sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.UDOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.BLOB, 331 TokenType.MEDIUMBLOB, 332 TokenType.LONGBLOB, 333 TokenType.BINARY, 334 TokenType.VARBINARY, 335 TokenType.JSON, 336 TokenType.JSONB, 337 TokenType.INTERVAL, 338 TokenType.TINYBLOB, 339 TokenType.TINYTEXT, 340 TokenType.TIME, 341 TokenType.TIMETZ, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMP_S, 344 TokenType.TIMESTAMP_MS, 345 TokenType.TIMESTAMP_NS, 346 TokenType.TIMESTAMPTZ, 347 TokenType.TIMESTAMPLTZ, 348 TokenType.TIMESTAMPNTZ, 349 TokenType.DATETIME, 350 TokenType.DATETIME2, 351 TokenType.DATETIME64, 352 TokenType.SMALLDATETIME, 353 TokenType.DATE, 354 TokenType.DATE32, 355 TokenType.INT4RANGE, 356 TokenType.INT4MULTIRANGE, 357 TokenType.INT8RANGE, 358 TokenType.INT8MULTIRANGE, 359 TokenType.NUMRANGE, 360 TokenType.NUMMULTIRANGE, 361 TokenType.TSRANGE, 362 TokenType.TSMULTIRANGE, 363 TokenType.TSTZRANGE, 364 TokenType.TSTZMULTIRANGE, 365 TokenType.DATERANGE, 366 TokenType.DATEMULTIRANGE, 367 TokenType.DECIMAL, 368 TokenType.DECIMAL32, 369 TokenType.DECIMAL64, 370 TokenType.DECIMAL128, 371 TokenType.DECIMAL256, 372 TokenType.UDECIMAL, 373 TokenType.BIGDECIMAL, 374 TokenType.UUID, 375 TokenType.GEOGRAPHY, 376 TokenType.GEOMETRY, 377 TokenType.POINT, 378 TokenType.RING, 379 TokenType.LINESTRING, 380 TokenType.MULTILINESTRING, 381 TokenType.POLYGON, 382 TokenType.MULTIPOLYGON, 383 TokenType.HLLSKETCH, 384 TokenType.HSTORE, 385 TokenType.PSEUDO_TYPE, 386 TokenType.SUPER, 387 TokenType.SERIAL, 388 TokenType.SMALLSERIAL, 389 TokenType.BIGSERIAL, 390 TokenType.XML, 391 TokenType.YEAR, 392 TokenType.USERDEFINED, 393 TokenType.MONEY, 394 TokenType.SMALLMONEY, 395 TokenType.ROWVERSION, 396 TokenType.IMAGE, 397 TokenType.VARIANT, 398 TokenType.VECTOR, 399 TokenType.OBJECT, 400 TokenType.OBJECT_IDENTIFIER, 401 TokenType.INET, 402 TokenType.IPADDRESS, 403 TokenType.IPPREFIX, 404 TokenType.IPV4, 405 TokenType.IPV6, 406 TokenType.UNKNOWN, 407 TokenType.NULL, 408 TokenType.NAME, 409 TokenType.TDIGEST, 410 TokenType.DYNAMIC, 411 *ENUM_TYPE_TOKENS, 412 *NESTED_TYPE_TOKENS, 413 *AGGREGATE_TYPE_TOKENS, 414 } 415 416 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 417 TokenType.BIGINT: TokenType.UBIGINT, 418 TokenType.INT: TokenType.UINT, 419 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 420 TokenType.SMALLINT: TokenType.USMALLINT, 421 TokenType.TINYINT: TokenType.UTINYINT, 422 TokenType.DECIMAL: TokenType.UDECIMAL, 423 TokenType.DOUBLE: TokenType.UDOUBLE, 424 } 425 426 SUBQUERY_PREDICATES = { 427 TokenType.ANY: exp.Any, 428 TokenType.ALL: exp.All, 429 TokenType.EXISTS: exp.Exists, 430 TokenType.SOME: exp.Any, 431 } 432 433 RESERVED_TOKENS = { 434 *Tokenizer.SINGLE_TOKENS.values(), 435 TokenType.SELECT, 436 } - {TokenType.IDENTIFIER} 437 438 DB_CREATABLES = { 439 TokenType.DATABASE, 440 TokenType.DICTIONARY, 441 TokenType.MODEL, 442 TokenType.NAMESPACE, 443 TokenType.SCHEMA, 444 TokenType.SEQUENCE, 445 TokenType.SINK, 446 TokenType.SOURCE, 447 TokenType.STORAGE_INTEGRATION, 448 TokenType.STREAMLIT, 449 TokenType.TABLE, 450 TokenType.TAG, 451 TokenType.VIEW, 452 TokenType.WAREHOUSE, 453 } 454 455 CREATABLES = { 456 TokenType.COLUMN, 457 TokenType.CONSTRAINT, 458 TokenType.FOREIGN_KEY, 459 TokenType.FUNCTION, 460 TokenType.INDEX, 461 TokenType.PROCEDURE, 462 *DB_CREATABLES, 463 } 464 465 ALTERABLES = { 466 TokenType.INDEX, 467 TokenType.TABLE, 468 TokenType.VIEW, 469 } 470 471 # Tokens that can represent identifiers 472 ID_VAR_TOKENS = { 473 TokenType.ALL, 474 TokenType.ATTACH, 475 TokenType.VAR, 476 TokenType.ANTI, 477 TokenType.APPLY, 478 TokenType.ASC, 479 TokenType.ASOF, 480 TokenType.AUTO_INCREMENT, 481 TokenType.BEGIN, 482 TokenType.BPCHAR, 483 TokenType.CACHE, 484 TokenType.CASE, 485 TokenType.COLLATE, 486 TokenType.COMMAND, 487 TokenType.COMMENT, 488 TokenType.COMMIT, 489 TokenType.CONSTRAINT, 490 TokenType.COPY, 491 TokenType.CUBE, 492 TokenType.CURRENT_SCHEMA, 493 TokenType.DEFAULT, 494 TokenType.DELETE, 495 TokenType.DESC, 496 TokenType.DESCRIBE, 497 TokenType.DETACH, 498 TokenType.DICTIONARY, 499 TokenType.DIV, 500 TokenType.END, 501 TokenType.EXECUTE, 502 TokenType.EXPORT, 503 TokenType.ESCAPE, 504 TokenType.FALSE, 505 TokenType.FIRST, 506 TokenType.FILTER, 507 TokenType.FINAL, 508 TokenType.FORMAT, 509 TokenType.FULL, 510 TokenType.IDENTIFIER, 511 TokenType.IS, 512 TokenType.ISNULL, 513 TokenType.INTERVAL, 514 TokenType.KEEP, 515 TokenType.KILL, 516 TokenType.LEFT, 517 TokenType.LIMIT, 518 TokenType.LOAD, 519 TokenType.MERGE, 520 TokenType.NATURAL, 521 TokenType.NEXT, 522 TokenType.OFFSET, 523 TokenType.OPERATOR, 524 TokenType.ORDINALITY, 525 TokenType.OVERLAPS, 526 TokenType.OVERWRITE, 527 TokenType.PARTITION, 528 TokenType.PERCENT, 529 TokenType.PIVOT, 530 TokenType.PRAGMA, 531 TokenType.PUT, 532 TokenType.RANGE, 533 TokenType.RECURSIVE, 534 TokenType.REFERENCES, 535 TokenType.REFRESH, 536 TokenType.RENAME, 537 TokenType.REPLACE, 538 TokenType.RIGHT, 539 TokenType.ROLLUP, 540 TokenType.ROW, 541 TokenType.ROWS, 542 TokenType.SEMI, 543 TokenType.SET, 544 TokenType.SETTINGS, 545 TokenType.SHOW, 546 TokenType.TEMPORARY, 547 TokenType.TOP, 548 TokenType.TRUE, 549 TokenType.TRUNCATE, 550 TokenType.UNIQUE, 551 TokenType.UNNEST, 552 TokenType.UNPIVOT, 553 TokenType.UPDATE, 554 TokenType.USE, 555 TokenType.VOLATILE, 556 TokenType.WINDOW, 557 *CREATABLES, 558 *SUBQUERY_PREDICATES, 559 *TYPE_TOKENS, 560 *NO_PAREN_FUNCTIONS, 561 } 562 ID_VAR_TOKENS.remove(TokenType.UNION) 563 564 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 565 TokenType.ANTI, 566 TokenType.APPLY, 567 TokenType.ASOF, 568 TokenType.FULL, 569 TokenType.LEFT, 570 TokenType.LOCK, 571 TokenType.NATURAL, 572 TokenType.RIGHT, 573 TokenType.SEMI, 574 TokenType.WINDOW, 575 } 576 577 ALIAS_TOKENS = ID_VAR_TOKENS 578 579 ARRAY_CONSTRUCTORS = { 580 "ARRAY": exp.Array, 581 "LIST": exp.List, 582 } 583 584 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 585 586 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 587 588 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 589 590 FUNC_TOKENS = { 591 TokenType.COLLATE, 592 TokenType.COMMAND, 593 TokenType.CURRENT_DATE, 594 TokenType.CURRENT_DATETIME, 595 TokenType.CURRENT_SCHEMA, 596 TokenType.CURRENT_TIMESTAMP, 597 TokenType.CURRENT_TIME, 598 TokenType.CURRENT_USER, 599 TokenType.FILTER, 600 TokenType.FIRST, 601 TokenType.FORMAT, 602 TokenType.GLOB, 603 TokenType.IDENTIFIER, 604 TokenType.INDEX, 605 TokenType.ISNULL, 606 TokenType.ILIKE, 607 TokenType.INSERT, 608 TokenType.LIKE, 609 TokenType.MERGE, 610 TokenType.NEXT, 611 TokenType.OFFSET, 612 TokenType.PRIMARY_KEY, 613 TokenType.RANGE, 614 TokenType.REPLACE, 615 TokenType.RLIKE, 616 TokenType.ROW, 617 TokenType.UNNEST, 618 TokenType.VAR, 619 TokenType.LEFT, 620 TokenType.RIGHT, 621 TokenType.SEQUENCE, 622 TokenType.DATE, 623 TokenType.DATETIME, 624 TokenType.TABLE, 625 TokenType.TIMESTAMP, 626 TokenType.TIMESTAMPTZ, 627 TokenType.TRUNCATE, 628 TokenType.WINDOW, 629 TokenType.XOR, 630 *TYPE_TOKENS, 631 *SUBQUERY_PREDICATES, 632 } 633 634 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 635 TokenType.AND: exp.And, 636 } 637 638 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 639 TokenType.COLON_EQ: exp.PropertyEQ, 640 } 641 642 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 643 TokenType.OR: exp.Or, 644 } 645 646 EQUALITY = { 647 TokenType.EQ: exp.EQ, 648 TokenType.NEQ: exp.NEQ, 649 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 650 } 651 652 COMPARISON = { 653 TokenType.GT: exp.GT, 654 TokenType.GTE: exp.GTE, 655 TokenType.LT: exp.LT, 656 TokenType.LTE: exp.LTE, 657 } 658 659 BITWISE = { 660 TokenType.AMP: exp.BitwiseAnd, 661 TokenType.CARET: exp.BitwiseXor, 662 TokenType.PIPE: exp.BitwiseOr, 663 } 664 665 TERM = { 666 TokenType.DASH: exp.Sub, 667 TokenType.PLUS: exp.Add, 668 TokenType.MOD: exp.Mod, 669 TokenType.COLLATE: exp.Collate, 670 } 671 672 FACTOR = { 673 TokenType.DIV: exp.IntDiv, 674 TokenType.LR_ARROW: exp.Distance, 675 TokenType.SLASH: exp.Div, 676 TokenType.STAR: exp.Mul, 677 } 678 679 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 680 681 TIMES = { 682 TokenType.TIME, 683 TokenType.TIMETZ, 684 } 685 686 TIMESTAMPS = { 687 TokenType.TIMESTAMP, 688 TokenType.TIMESTAMPNTZ, 689 TokenType.TIMESTAMPTZ, 690 TokenType.TIMESTAMPLTZ, 691 *TIMES, 692 } 693 694 SET_OPERATIONS = { 695 TokenType.UNION, 696 TokenType.INTERSECT, 697 TokenType.EXCEPT, 698 } 699 700 JOIN_METHODS = { 701 TokenType.ASOF, 702 TokenType.NATURAL, 703 TokenType.POSITIONAL, 704 } 705 706 JOIN_SIDES = { 707 TokenType.LEFT, 708 TokenType.RIGHT, 709 TokenType.FULL, 710 } 711 712 JOIN_KINDS = { 713 TokenType.ANTI, 714 TokenType.CROSS, 715 TokenType.INNER, 716 TokenType.OUTER, 717 TokenType.SEMI, 718 TokenType.STRAIGHT_JOIN, 719 } 720 721 JOIN_HINTS: t.Set[str] = set() 722 723 LAMBDAS = { 724 TokenType.ARROW: lambda self, expressions: self.expression( 725 exp.Lambda, 726 this=self._replace_lambda( 727 self._parse_assignment(), 728 expressions, 729 ), 730 expressions=expressions, 731 ), 732 TokenType.FARROW: lambda self, expressions: self.expression( 733 exp.Kwarg, 734 this=exp.var(expressions[0].name), 735 expression=self._parse_assignment(), 736 ), 737 } 738 739 COLUMN_OPERATORS = { 740 TokenType.DOT: None, 741 TokenType.DOTCOLON: lambda self, this, to: self.expression( 742 exp.JSONCast, 743 this=this, 744 to=to, 745 ), 746 TokenType.DCOLON: lambda self, this, to: self.expression( 747 exp.Cast if self.STRICT_CAST else exp.TryCast, 748 this=this, 749 to=to, 750 ), 751 TokenType.ARROW: lambda self, this, path: self.expression( 752 exp.JSONExtract, 753 this=this, 754 expression=self.dialect.to_json_path(path), 755 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 756 ), 757 TokenType.DARROW: lambda self, this, path: self.expression( 758 exp.JSONExtractScalar, 759 this=this, 760 expression=self.dialect.to_json_path(path), 761 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 762 ), 763 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 764 exp.JSONBExtract, 765 this=this, 766 expression=path, 767 ), 768 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 769 exp.JSONBExtractScalar, 770 this=this, 771 expression=path, 772 ), 773 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 774 exp.JSONBContains, 775 this=this, 776 expression=key, 777 ), 778 } 779 780 EXPRESSION_PARSERS = { 781 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 782 exp.Column: lambda self: self._parse_column(), 783 exp.Condition: lambda self: self._parse_assignment(), 784 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 785 exp.Expression: lambda self: self._parse_expression(), 786 exp.From: lambda self: self._parse_from(joins=True), 787 exp.Group: lambda self: self._parse_group(), 788 exp.Having: lambda self: self._parse_having(), 789 exp.Hint: lambda self: self._parse_hint_body(), 790 exp.Identifier: lambda self: self._parse_id_var(), 791 exp.Join: lambda self: self._parse_join(), 792 exp.Lambda: lambda self: self._parse_lambda(), 793 exp.Lateral: lambda self: self._parse_lateral(), 794 exp.Limit: lambda self: self._parse_limit(), 795 exp.Offset: lambda self: self._parse_offset(), 796 exp.Order: lambda self: self._parse_order(), 797 exp.Ordered: lambda self: self._parse_ordered(), 798 exp.Properties: lambda self: self._parse_properties(), 799 exp.Qualify: lambda self: self._parse_qualify(), 800 exp.Returning: lambda self: self._parse_returning(), 801 exp.Select: lambda self: self._parse_select(), 802 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 803 exp.Table: lambda self: self._parse_table_parts(), 804 exp.TableAlias: lambda self: self._parse_table_alias(), 805 exp.Tuple: lambda self: self._parse_value(), 806 exp.Whens: lambda self: self._parse_when_matched(), 807 exp.Where: lambda self: self._parse_where(), 808 exp.Window: lambda self: self._parse_named_window(), 809 exp.With: lambda self: self._parse_with(), 810 "JOIN_TYPE": lambda self: self._parse_join_parts(), 811 } 812 813 STATEMENT_PARSERS = { 814 TokenType.ALTER: lambda self: self._parse_alter(), 815 TokenType.ANALYZE: lambda self: self._parse_analyze(), 816 TokenType.BEGIN: lambda self: self._parse_transaction(), 817 TokenType.CACHE: lambda self: self._parse_cache(), 818 TokenType.COMMENT: lambda self: self._parse_comment(), 819 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 820 TokenType.COPY: lambda self: self._parse_copy(), 821 TokenType.CREATE: lambda self: self._parse_create(), 822 TokenType.DELETE: lambda self: self._parse_delete(), 823 TokenType.DESC: lambda self: self._parse_describe(), 824 TokenType.DESCRIBE: lambda self: self._parse_describe(), 825 TokenType.DROP: lambda self: self._parse_drop(), 826 TokenType.GRANT: lambda self: self._parse_grant(), 827 TokenType.INSERT: lambda self: self._parse_insert(), 828 TokenType.KILL: lambda self: self._parse_kill(), 829 TokenType.LOAD: lambda self: self._parse_load(), 830 TokenType.MERGE: lambda self: self._parse_merge(), 831 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 832 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 833 TokenType.REFRESH: lambda self: self._parse_refresh(), 834 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 835 TokenType.SET: lambda self: self._parse_set(), 836 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 837 TokenType.UNCACHE: lambda self: self._parse_uncache(), 838 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 839 TokenType.UPDATE: lambda self: self._parse_update(), 840 TokenType.USE: lambda self: self._parse_use(), 841 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 842 } 843 844 UNARY_PARSERS = { 845 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 846 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 847 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 848 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 849 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 850 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 851 } 852 853 STRING_PARSERS = { 854 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 855 exp.RawString, this=token.text 856 ), 857 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 858 exp.National, this=token.text 859 ), 860 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 861 TokenType.STRING: lambda self, token: self.expression( 862 exp.Literal, this=token.text, is_string=True 863 ), 864 TokenType.UNICODE_STRING: lambda self, token: self.expression( 865 exp.UnicodeString, 866 this=token.text, 867 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 868 ), 869 } 870 871 NUMERIC_PARSERS = { 872 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 873 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 874 TokenType.HEX_STRING: lambda self, token: self.expression( 875 exp.HexString, 876 this=token.text, 877 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 878 ), 879 TokenType.NUMBER: lambda self, token: self.expression( 880 exp.Literal, this=token.text, is_string=False 881 ), 882 } 883 884 PRIMARY_PARSERS = { 885 **STRING_PARSERS, 886 **NUMERIC_PARSERS, 887 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 888 TokenType.NULL: lambda self, _: self.expression(exp.Null), 889 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 890 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 891 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 892 TokenType.STAR: lambda self, _: self._parse_star_ops(), 893 } 894 895 PLACEHOLDER_PARSERS = { 896 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 897 TokenType.PARAMETER: lambda self: self._parse_parameter(), 898 TokenType.COLON: lambda self: ( 899 self.expression(exp.Placeholder, this=self._prev.text) 900 if self._match_set(self.ID_VAR_TOKENS) 901 else None 902 ), 903 } 904 905 RANGE_PARSERS = { 906 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 907 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 908 TokenType.GLOB: binary_range_parser(exp.Glob), 909 TokenType.ILIKE: binary_range_parser(exp.ILike), 910 TokenType.IN: lambda self, this: self._parse_in(this), 911 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 912 TokenType.IS: lambda self, this: self._parse_is(this), 913 TokenType.LIKE: binary_range_parser(exp.Like), 914 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 915 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 916 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 917 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 918 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 919 } 920 921 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 922 "ALLOWED_VALUES": lambda self: self.expression( 923 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 924 ), 925 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 926 "AUTO": lambda self: self._parse_auto_property(), 927 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 928 "BACKUP": lambda self: self.expression( 929 exp.BackupProperty, this=self._parse_var(any_token=True) 930 ), 931 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 932 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 933 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 934 "CHECKSUM": lambda self: self._parse_checksum(), 935 "CLUSTER BY": lambda self: self._parse_cluster(), 936 "CLUSTERED": lambda self: self._parse_clustered_by(), 937 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 938 exp.CollateProperty, **kwargs 939 ), 940 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 941 "CONTAINS": lambda self: self._parse_contains_property(), 942 "COPY": lambda self: self._parse_copy_property(), 943 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 944 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 945 "DEFINER": lambda self: self._parse_definer(), 946 "DETERMINISTIC": lambda self: self.expression( 947 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 948 ), 949 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 950 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 951 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 952 "DISTKEY": lambda self: self._parse_distkey(), 953 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 954 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 955 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 956 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 957 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 958 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 959 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 960 "FREESPACE": lambda self: self._parse_freespace(), 961 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 962 "HEAP": lambda self: self.expression(exp.HeapProperty), 963 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 964 "IMMUTABLE": lambda self: self.expression( 965 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 966 ), 967 "INHERITS": lambda self: self.expression( 968 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 969 ), 970 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 971 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 972 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 973 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 974 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 975 "LIKE": lambda self: self._parse_create_like(), 976 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 977 "LOCK": lambda self: self._parse_locking(), 978 "LOCKING": lambda self: self._parse_locking(), 979 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 980 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 981 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 982 "MODIFIES": lambda self: self._parse_modifies_property(), 983 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 984 "NO": lambda self: self._parse_no_property(), 985 "ON": lambda self: self._parse_on_property(), 986 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 987 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 988 "PARTITION": lambda self: self._parse_partitioned_of(), 989 "PARTITION BY": lambda self: self._parse_partitioned_by(), 990 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 991 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 992 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 993 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 994 "READS": lambda self: self._parse_reads_property(), 995 "REMOTE": lambda self: self._parse_remote_with_connection(), 996 "RETURNS": lambda self: self._parse_returns(), 997 "STRICT": lambda self: self.expression(exp.StrictProperty), 998 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 999 "ROW": lambda self: self._parse_row(), 1000 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1001 "SAMPLE": lambda self: self.expression( 1002 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1003 ), 1004 "SECURE": lambda self: self.expression(exp.SecureProperty), 1005 "SECURITY": lambda self: self._parse_security(), 1006 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1007 "SETTINGS": lambda self: self._parse_settings_property(), 1008 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1009 "SORTKEY": lambda self: self._parse_sortkey(), 1010 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1011 "STABLE": lambda self: self.expression( 1012 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1013 ), 1014 "STORED": lambda self: self._parse_stored(), 1015 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1016 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1017 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1018 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1019 "TO": lambda self: self._parse_to_table(), 1020 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1021 "TRANSFORM": lambda self: self.expression( 1022 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1023 ), 1024 "TTL": lambda self: self._parse_ttl(), 1025 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1026 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1027 "VOLATILE": lambda self: self._parse_volatile_property(), 1028 "WITH": lambda self: self._parse_with_property(), 1029 } 1030 1031 CONSTRAINT_PARSERS = { 1032 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1033 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1034 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1035 "CHARACTER SET": lambda self: self.expression( 1036 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1037 ), 1038 "CHECK": lambda self: self.expression( 1039 exp.CheckColumnConstraint, 1040 this=self._parse_wrapped(self._parse_assignment), 1041 enforced=self._match_text_seq("ENFORCED"), 1042 ), 1043 "COLLATE": lambda self: self.expression( 1044 exp.CollateColumnConstraint, 1045 this=self._parse_identifier() or self._parse_column(), 1046 ), 1047 "COMMENT": lambda self: self.expression( 1048 exp.CommentColumnConstraint, this=self._parse_string() 1049 ), 1050 "COMPRESS": lambda self: self._parse_compress(), 1051 "CLUSTERED": lambda self: self.expression( 1052 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1053 ), 1054 "NONCLUSTERED": lambda self: self.expression( 1055 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1056 ), 1057 "DEFAULT": lambda self: self.expression( 1058 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1059 ), 1060 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1061 "EPHEMERAL": lambda self: self.expression( 1062 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1063 ), 1064 "EXCLUDE": lambda self: self.expression( 1065 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1066 ), 1067 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1068 "FORMAT": lambda self: self.expression( 1069 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1070 ), 1071 "GENERATED": lambda self: self._parse_generated_as_identity(), 1072 "IDENTITY": lambda self: self._parse_auto_increment(), 1073 "INLINE": lambda self: self._parse_inline(), 1074 "LIKE": lambda self: self._parse_create_like(), 1075 "NOT": lambda self: self._parse_not_constraint(), 1076 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1077 "ON": lambda self: ( 1078 self._match(TokenType.UPDATE) 1079 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1080 ) 1081 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1082 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1083 "PERIOD": lambda self: self._parse_period_for_system_time(), 1084 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1085 "REFERENCES": lambda self: self._parse_references(match=False), 1086 "TITLE": lambda self: self.expression( 1087 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1088 ), 1089 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1090 "UNIQUE": lambda self: self._parse_unique(), 1091 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1092 "WATERMARK": lambda self: self.expression( 1093 exp.WatermarkColumnConstraint, 1094 this=self._match(TokenType.FOR) and self._parse_column(), 1095 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1096 ), 1097 "WITH": lambda self: self.expression( 1098 exp.Properties, expressions=self._parse_wrapped_properties() 1099 ), 1100 } 1101 1102 ALTER_PARSERS = { 1103 "ADD": lambda self: self._parse_alter_table_add(), 1104 "AS": lambda self: self._parse_select(), 1105 "ALTER": lambda self: self._parse_alter_table_alter(), 1106 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1107 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1108 "DROP": lambda self: self._parse_alter_table_drop(), 1109 "RENAME": lambda self: self._parse_alter_table_rename(), 1110 "SET": lambda self: self._parse_alter_table_set(), 1111 "SWAP": lambda self: self.expression( 1112 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1113 ), 1114 } 1115 1116 ALTER_ALTER_PARSERS = { 1117 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1118 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1119 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1120 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1121 } 1122 1123 SCHEMA_UNNAMED_CONSTRAINTS = { 1124 "CHECK", 1125 "EXCLUDE", 1126 "FOREIGN KEY", 1127 "LIKE", 1128 "PERIOD", 1129 "PRIMARY KEY", 1130 "UNIQUE", 1131 "WATERMARK", 1132 } 1133 1134 NO_PAREN_FUNCTION_PARSERS = { 1135 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1136 "CASE": lambda self: self._parse_case(), 1137 "CONNECT_BY_ROOT": lambda self: self.expression( 1138 exp.ConnectByRoot, this=self._parse_column() 1139 ), 1140 "IF": lambda self: self._parse_if(), 1141 } 1142 1143 INVALID_FUNC_NAME_TOKENS = { 1144 TokenType.IDENTIFIER, 1145 TokenType.STRING, 1146 } 1147 1148 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1149 1150 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1151 1152 FUNCTION_PARSERS = { 1153 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1154 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1155 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1156 "DECODE": lambda self: self._parse_decode(), 1157 "EXTRACT": lambda self: self._parse_extract(), 1158 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1159 "GAP_FILL": lambda self: self._parse_gap_fill(), 1160 "JSON_OBJECT": lambda self: self._parse_json_object(), 1161 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1162 "JSON_TABLE": lambda self: self._parse_json_table(), 1163 "MATCH": lambda self: self._parse_match_against(), 1164 "NORMALIZE": lambda self: self._parse_normalize(), 1165 "OPENJSON": lambda self: self._parse_open_json(), 1166 "OVERLAY": lambda self: self._parse_overlay(), 1167 "POSITION": lambda self: self._parse_position(), 1168 "PREDICT": lambda self: self._parse_predict(), 1169 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1170 "STRING_AGG": lambda self: self._parse_string_agg(), 1171 "SUBSTRING": lambda self: self._parse_substring(), 1172 "TRIM": lambda self: self._parse_trim(), 1173 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1174 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1175 "XMLELEMENT": lambda self: self.expression( 1176 exp.XMLElement, 1177 this=self._match_text_seq("NAME") and self._parse_id_var(), 1178 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1179 ), 1180 "XMLTABLE": lambda self: self._parse_xml_table(), 1181 } 1182 1183 QUERY_MODIFIER_PARSERS = { 1184 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1185 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1186 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1187 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1188 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1189 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1190 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1191 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1192 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1193 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1194 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1195 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1196 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1197 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1198 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1199 TokenType.CLUSTER_BY: lambda self: ( 1200 "cluster", 1201 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1202 ), 1203 TokenType.DISTRIBUTE_BY: lambda self: ( 1204 "distribute", 1205 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1206 ), 1207 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1208 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1209 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1210 } 1211 1212 SET_PARSERS = { 1213 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1214 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1215 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1216 "TRANSACTION": lambda self: self._parse_set_transaction(), 1217 } 1218 1219 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1220 1221 TYPE_LITERAL_PARSERS = { 1222 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1223 } 1224 1225 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1226 1227 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1228 1229 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1230 1231 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1232 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1233 "ISOLATION": ( 1234 ("LEVEL", "REPEATABLE", "READ"), 1235 ("LEVEL", "READ", "COMMITTED"), 1236 ("LEVEL", "READ", "UNCOMITTED"), 1237 ("LEVEL", "SERIALIZABLE"), 1238 ), 1239 "READ": ("WRITE", "ONLY"), 1240 } 1241 1242 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1243 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1244 ) 1245 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1246 1247 CREATE_SEQUENCE: OPTIONS_TYPE = { 1248 "SCALE": ("EXTEND", "NOEXTEND"), 1249 "SHARD": ("EXTEND", "NOEXTEND"), 1250 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1251 **dict.fromkeys( 1252 ( 1253 "SESSION", 1254 "GLOBAL", 1255 "KEEP", 1256 "NOKEEP", 1257 "ORDER", 1258 "NOORDER", 1259 "NOCACHE", 1260 "CYCLE", 1261 "NOCYCLE", 1262 "NOMINVALUE", 1263 "NOMAXVALUE", 1264 "NOSCALE", 1265 "NOSHARD", 1266 ), 1267 tuple(), 1268 ), 1269 } 1270 1271 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1272 1273 USABLES: OPTIONS_TYPE = dict.fromkeys( 1274 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1275 ) 1276 1277 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1278 1279 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1280 "TYPE": ("EVOLUTION",), 1281 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1282 } 1283 1284 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1285 1286 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1287 1288 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1289 "NOT": ("ENFORCED",), 1290 "MATCH": ( 1291 "FULL", 1292 "PARTIAL", 1293 "SIMPLE", 1294 ), 1295 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1296 "USING": ( 1297 "BTREE", 1298 "HASH", 1299 ), 1300 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1301 } 1302 1303 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1304 1305 CLONE_KEYWORDS = {"CLONE", "COPY"} 1306 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1307 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1308 1309 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1310 1311 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1312 1313 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1314 1315 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1316 1317 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1318 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1319 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1320 1321 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1322 1323 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1324 1325 ADD_CONSTRAINT_TOKENS = { 1326 TokenType.CONSTRAINT, 1327 TokenType.FOREIGN_KEY, 1328 TokenType.INDEX, 1329 TokenType.KEY, 1330 TokenType.PRIMARY_KEY, 1331 TokenType.UNIQUE, 1332 } 1333 1334 DISTINCT_TOKENS = {TokenType.DISTINCT} 1335 1336 NULL_TOKENS = {TokenType.NULL} 1337 1338 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1339 1340 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1341 1342 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1343 1344 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1345 1346 ODBC_DATETIME_LITERALS = { 1347 "d": exp.Date, 1348 "t": exp.Time, 1349 "ts": exp.Timestamp, 1350 } 1351 1352 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1353 1354 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1355 1356 # The style options for the DESCRIBE statement 1357 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1358 1359 # The style options for the ANALYZE statement 1360 ANALYZE_STYLES = { 1361 "BUFFER_USAGE_LIMIT", 1362 "FULL", 1363 "LOCAL", 1364 "NO_WRITE_TO_BINLOG", 1365 "SAMPLE", 1366 "SKIP_LOCKED", 1367 "VERBOSE", 1368 } 1369 1370 ANALYZE_EXPRESSION_PARSERS = { 1371 "ALL": lambda self: self._parse_analyze_columns(), 1372 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1373 "DELETE": lambda self: self._parse_analyze_delete(), 1374 "DROP": lambda self: self._parse_analyze_histogram(), 1375 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1376 "LIST": lambda self: self._parse_analyze_list(), 1377 "PREDICATE": lambda self: self._parse_analyze_columns(), 1378 "UPDATE": lambda self: self._parse_analyze_histogram(), 1379 "VALIDATE": lambda self: self._parse_analyze_validate(), 1380 } 1381 1382 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1383 1384 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1385 1386 OPERATION_MODIFIERS: t.Set[str] = set() 1387 1388 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1389 1390 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1391 1392 STRICT_CAST = True 1393 1394 PREFIXED_PIVOT_COLUMNS = False 1395 IDENTIFY_PIVOT_STRINGS = False 1396 1397 LOG_DEFAULTS_TO_LN = False 1398 1399 # Whether ADD is present for each column added by ALTER TABLE 1400 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1401 1402 # Whether the table sample clause expects CSV syntax 1403 TABLESAMPLE_CSV = False 1404 1405 # The default method used for table sampling 1406 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1407 1408 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1409 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1410 1411 # Whether the TRIM function expects the characters to trim as its first argument 1412 TRIM_PATTERN_FIRST = False 1413 1414 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1415 STRING_ALIASES = False 1416 1417 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1418 MODIFIERS_ATTACHED_TO_SET_OP = True 1419 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1420 1421 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1422 NO_PAREN_IF_COMMANDS = True 1423 1424 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1425 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1426 1427 # Whether the `:` operator is used to extract a value from a VARIANT column 1428 COLON_IS_VARIANT_EXTRACT = False 1429 1430 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1431 # If this is True and '(' is not found, the keyword will be treated as an identifier 1432 VALUES_FOLLOWED_BY_PAREN = True 1433 1434 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1435 SUPPORTS_IMPLICIT_UNNEST = False 1436 1437 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1438 INTERVAL_SPANS = True 1439 1440 # Whether a PARTITION clause can follow a table reference 1441 SUPPORTS_PARTITION_SELECTION = False 1442 1443 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1444 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1445 1446 # Whether the 'AS' keyword is optional in the CTE definition syntax 1447 OPTIONAL_ALIAS_TOKEN_CTE = True 1448 1449 __slots__ = ( 1450 "error_level", 1451 "error_message_context", 1452 "max_errors", 1453 "dialect", 1454 "sql", 1455 "errors", 1456 "_tokens", 1457 "_index", 1458 "_curr", 1459 "_next", 1460 "_prev", 1461 "_prev_comments", 1462 ) 1463 1464 # Autofilled 1465 SHOW_TRIE: t.Dict = {} 1466 SET_TRIE: t.Dict = {} 1467 1468 def __init__( 1469 self, 1470 error_level: t.Optional[ErrorLevel] = None, 1471 error_message_context: int = 100, 1472 max_errors: int = 3, 1473 dialect: DialectType = None, 1474 ): 1475 from sqlglot.dialects import Dialect 1476 1477 self.error_level = error_level or ErrorLevel.IMMEDIATE 1478 self.error_message_context = error_message_context 1479 self.max_errors = max_errors 1480 self.dialect = Dialect.get_or_raise(dialect) 1481 self.reset() 1482 1483 def reset(self): 1484 self.sql = "" 1485 self.errors = [] 1486 self._tokens = [] 1487 self._index = 0 1488 self._curr = None 1489 self._next = None 1490 self._prev = None 1491 self._prev_comments = None 1492 1493 def parse( 1494 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1495 ) -> t.List[t.Optional[exp.Expression]]: 1496 """ 1497 Parses a list of tokens and returns a list of syntax trees, one tree 1498 per parsed SQL statement. 1499 1500 Args: 1501 raw_tokens: The list of tokens. 1502 sql: The original SQL string, used to produce helpful debug messages. 1503 1504 Returns: 1505 The list of the produced syntax trees. 1506 """ 1507 return self._parse( 1508 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1509 ) 1510 1511 def parse_into( 1512 self, 1513 expression_types: exp.IntoType, 1514 raw_tokens: t.List[Token], 1515 sql: t.Optional[str] = None, 1516 ) -> t.List[t.Optional[exp.Expression]]: 1517 """ 1518 Parses a list of tokens into a given Expression type. If a collection of Expression 1519 types is given instead, this method will try to parse the token list into each one 1520 of them, stopping at the first for which the parsing succeeds. 1521 1522 Args: 1523 expression_types: The expression type(s) to try and parse the token list into. 1524 raw_tokens: The list of tokens. 1525 sql: The original SQL string, used to produce helpful debug messages. 1526 1527 Returns: 1528 The target Expression. 1529 """ 1530 errors = [] 1531 for expression_type in ensure_list(expression_types): 1532 parser = self.EXPRESSION_PARSERS.get(expression_type) 1533 if not parser: 1534 raise TypeError(f"No parser registered for {expression_type}") 1535 1536 try: 1537 return self._parse(parser, raw_tokens, sql) 1538 except ParseError as e: 1539 e.errors[0]["into_expression"] = expression_type 1540 errors.append(e) 1541 1542 raise ParseError( 1543 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1544 errors=merge_errors(errors), 1545 ) from errors[-1] 1546 1547 def _parse( 1548 self, 1549 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1550 raw_tokens: t.List[Token], 1551 sql: t.Optional[str] = None, 1552 ) -> t.List[t.Optional[exp.Expression]]: 1553 self.reset() 1554 self.sql = sql or "" 1555 1556 total = len(raw_tokens) 1557 chunks: t.List[t.List[Token]] = [[]] 1558 1559 for i, token in enumerate(raw_tokens): 1560 if token.token_type == TokenType.SEMICOLON: 1561 if token.comments: 1562 chunks.append([token]) 1563 1564 if i < total - 1: 1565 chunks.append([]) 1566 else: 1567 chunks[-1].append(token) 1568 1569 expressions = [] 1570 1571 for tokens in chunks: 1572 self._index = -1 1573 self._tokens = tokens 1574 self._advance() 1575 1576 expressions.append(parse_method(self)) 1577 1578 if self._index < len(self._tokens): 1579 self.raise_error("Invalid expression / Unexpected token") 1580 1581 self.check_errors() 1582 1583 return expressions 1584 1585 def check_errors(self) -> None: 1586 """Logs or raises any found errors, depending on the chosen error level setting.""" 1587 if self.error_level == ErrorLevel.WARN: 1588 for error in self.errors: 1589 logger.error(str(error)) 1590 elif self.error_level == ErrorLevel.RAISE and self.errors: 1591 raise ParseError( 1592 concat_messages(self.errors, self.max_errors), 1593 errors=merge_errors(self.errors), 1594 ) 1595 1596 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1597 """ 1598 Appends an error in the list of recorded errors or raises it, depending on the chosen 1599 error level setting. 1600 """ 1601 token = token or self._curr or self._prev or Token.string("") 1602 start = token.start 1603 end = token.end + 1 1604 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1605 highlight = self.sql[start:end] 1606 end_context = self.sql[end : end + self.error_message_context] 1607 1608 error = ParseError.new( 1609 f"{message}. Line {token.line}, Col: {token.col}.\n" 1610 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1611 description=message, 1612 line=token.line, 1613 col=token.col, 1614 start_context=start_context, 1615 highlight=highlight, 1616 end_context=end_context, 1617 ) 1618 1619 if self.error_level == ErrorLevel.IMMEDIATE: 1620 raise error 1621 1622 self.errors.append(error) 1623 1624 def expression( 1625 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1626 ) -> E: 1627 """ 1628 Creates a new, validated Expression. 1629 1630 Args: 1631 exp_class: The expression class to instantiate. 1632 comments: An optional list of comments to attach to the expression. 1633 kwargs: The arguments to set for the expression along with their respective values. 1634 1635 Returns: 1636 The target expression. 1637 """ 1638 instance = exp_class(**kwargs) 1639 instance.add_comments(comments) if comments else self._add_comments(instance) 1640 return self.validate_expression(instance) 1641 1642 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1643 if expression and self._prev_comments: 1644 expression.add_comments(self._prev_comments) 1645 self._prev_comments = None 1646 1647 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1648 """ 1649 Validates an Expression, making sure that all its mandatory arguments are set. 1650 1651 Args: 1652 expression: The expression to validate. 1653 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1654 1655 Returns: 1656 The validated expression. 1657 """ 1658 if self.error_level != ErrorLevel.IGNORE: 1659 for error_message in expression.error_messages(args): 1660 self.raise_error(error_message) 1661 1662 return expression 1663 1664 def _find_sql(self, start: Token, end: Token) -> str: 1665 return self.sql[start.start : end.end + 1] 1666 1667 def _is_connected(self) -> bool: 1668 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1669 1670 def _advance(self, times: int = 1) -> None: 1671 self._index += times 1672 self._curr = seq_get(self._tokens, self._index) 1673 self._next = seq_get(self._tokens, self._index + 1) 1674 1675 if self._index > 0: 1676 self._prev = self._tokens[self._index - 1] 1677 self._prev_comments = self._prev.comments 1678 else: 1679 self._prev = None 1680 self._prev_comments = None 1681 1682 def _retreat(self, index: int) -> None: 1683 if index != self._index: 1684 self._advance(index - self._index) 1685 1686 def _warn_unsupported(self) -> None: 1687 if len(self._tokens) <= 1: 1688 return 1689 1690 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1691 # interested in emitting a warning for the one being currently processed. 1692 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1693 1694 logger.warning( 1695 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1696 ) 1697 1698 def _parse_command(self) -> exp.Command: 1699 self._warn_unsupported() 1700 return self.expression( 1701 exp.Command, 1702 comments=self._prev_comments, 1703 this=self._prev.text.upper(), 1704 expression=self._parse_string(), 1705 ) 1706 1707 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1708 """ 1709 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1710 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1711 solve this by setting & resetting the parser state accordingly 1712 """ 1713 index = self._index 1714 error_level = self.error_level 1715 1716 self.error_level = ErrorLevel.IMMEDIATE 1717 try: 1718 this = parse_method() 1719 except ParseError: 1720 this = None 1721 finally: 1722 if not this or retreat: 1723 self._retreat(index) 1724 self.error_level = error_level 1725 1726 return this 1727 1728 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1729 start = self._prev 1730 exists = self._parse_exists() if allow_exists else None 1731 1732 self._match(TokenType.ON) 1733 1734 materialized = self._match_text_seq("MATERIALIZED") 1735 kind = self._match_set(self.CREATABLES) and self._prev 1736 if not kind: 1737 return self._parse_as_command(start) 1738 1739 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1740 this = self._parse_user_defined_function(kind=kind.token_type) 1741 elif kind.token_type == TokenType.TABLE: 1742 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1743 elif kind.token_type == TokenType.COLUMN: 1744 this = self._parse_column() 1745 else: 1746 this = self._parse_id_var() 1747 1748 self._match(TokenType.IS) 1749 1750 return self.expression( 1751 exp.Comment, 1752 this=this, 1753 kind=kind.text, 1754 expression=self._parse_string(), 1755 exists=exists, 1756 materialized=materialized, 1757 ) 1758 1759 def _parse_to_table( 1760 self, 1761 ) -> exp.ToTableProperty: 1762 table = self._parse_table_parts(schema=True) 1763 return self.expression(exp.ToTableProperty, this=table) 1764 1765 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1766 def _parse_ttl(self) -> exp.Expression: 1767 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1768 this = self._parse_bitwise() 1769 1770 if self._match_text_seq("DELETE"): 1771 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1772 if self._match_text_seq("RECOMPRESS"): 1773 return self.expression( 1774 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1775 ) 1776 if self._match_text_seq("TO", "DISK"): 1777 return self.expression( 1778 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1779 ) 1780 if self._match_text_seq("TO", "VOLUME"): 1781 return self.expression( 1782 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1783 ) 1784 1785 return this 1786 1787 expressions = self._parse_csv(_parse_ttl_action) 1788 where = self._parse_where() 1789 group = self._parse_group() 1790 1791 aggregates = None 1792 if group and self._match(TokenType.SET): 1793 aggregates = self._parse_csv(self._parse_set_item) 1794 1795 return self.expression( 1796 exp.MergeTreeTTL, 1797 expressions=expressions, 1798 where=where, 1799 group=group, 1800 aggregates=aggregates, 1801 ) 1802 1803 def _parse_statement(self) -> t.Optional[exp.Expression]: 1804 if self._curr is None: 1805 return None 1806 1807 if self._match_set(self.STATEMENT_PARSERS): 1808 comments = self._prev_comments 1809 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1810 stmt.add_comments(comments, prepend=True) 1811 return stmt 1812 1813 if self._match_set(self.dialect.tokenizer.COMMANDS): 1814 return self._parse_command() 1815 1816 expression = self._parse_expression() 1817 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1818 return self._parse_query_modifiers(expression) 1819 1820 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1821 start = self._prev 1822 temporary = self._match(TokenType.TEMPORARY) 1823 materialized = self._match_text_seq("MATERIALIZED") 1824 1825 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1826 if not kind: 1827 return self._parse_as_command(start) 1828 1829 concurrently = self._match_text_seq("CONCURRENTLY") 1830 if_exists = exists or self._parse_exists() 1831 1832 if kind == "COLUMN": 1833 this = self._parse_column() 1834 else: 1835 this = self._parse_table_parts( 1836 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1837 ) 1838 1839 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1840 1841 if self._match(TokenType.L_PAREN, advance=False): 1842 expressions = self._parse_wrapped_csv(self._parse_types) 1843 else: 1844 expressions = None 1845 1846 return self.expression( 1847 exp.Drop, 1848 exists=if_exists, 1849 this=this, 1850 expressions=expressions, 1851 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1852 temporary=temporary, 1853 materialized=materialized, 1854 cascade=self._match_text_seq("CASCADE"), 1855 constraints=self._match_text_seq("CONSTRAINTS"), 1856 purge=self._match_text_seq("PURGE"), 1857 cluster=cluster, 1858 concurrently=concurrently, 1859 ) 1860 1861 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1862 return ( 1863 self._match_text_seq("IF") 1864 and (not not_ or self._match(TokenType.NOT)) 1865 and self._match(TokenType.EXISTS) 1866 ) 1867 1868 def _parse_create(self) -> exp.Create | exp.Command: 1869 # Note: this can't be None because we've matched a statement parser 1870 start = self._prev 1871 1872 replace = ( 1873 start.token_type == TokenType.REPLACE 1874 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1875 or self._match_pair(TokenType.OR, TokenType.ALTER) 1876 ) 1877 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1878 1879 unique = self._match(TokenType.UNIQUE) 1880 1881 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1882 clustered = True 1883 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1884 "COLUMNSTORE" 1885 ): 1886 clustered = False 1887 else: 1888 clustered = None 1889 1890 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1891 self._advance() 1892 1893 properties = None 1894 create_token = self._match_set(self.CREATABLES) and self._prev 1895 1896 if not create_token: 1897 # exp.Properties.Location.POST_CREATE 1898 properties = self._parse_properties() 1899 create_token = self._match_set(self.CREATABLES) and self._prev 1900 1901 if not properties or not create_token: 1902 return self._parse_as_command(start) 1903 1904 concurrently = self._match_text_seq("CONCURRENTLY") 1905 exists = self._parse_exists(not_=True) 1906 this = None 1907 expression: t.Optional[exp.Expression] = None 1908 indexes = None 1909 no_schema_binding = None 1910 begin = None 1911 end = None 1912 clone = None 1913 1914 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1915 nonlocal properties 1916 if properties and temp_props: 1917 properties.expressions.extend(temp_props.expressions) 1918 elif temp_props: 1919 properties = temp_props 1920 1921 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1922 this = self._parse_user_defined_function(kind=create_token.token_type) 1923 1924 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1925 extend_props(self._parse_properties()) 1926 1927 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1928 extend_props(self._parse_properties()) 1929 1930 if not expression: 1931 if self._match(TokenType.COMMAND): 1932 expression = self._parse_as_command(self._prev) 1933 else: 1934 begin = self._match(TokenType.BEGIN) 1935 return_ = self._match_text_seq("RETURN") 1936 1937 if self._match(TokenType.STRING, advance=False): 1938 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1939 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1940 expression = self._parse_string() 1941 extend_props(self._parse_properties()) 1942 else: 1943 expression = self._parse_user_defined_function_expression() 1944 1945 end = self._match_text_seq("END") 1946 1947 if return_: 1948 expression = self.expression(exp.Return, this=expression) 1949 elif create_token.token_type == TokenType.INDEX: 1950 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1951 if not self._match(TokenType.ON): 1952 index = self._parse_id_var() 1953 anonymous = False 1954 else: 1955 index = None 1956 anonymous = True 1957 1958 this = self._parse_index(index=index, anonymous=anonymous) 1959 elif create_token.token_type in self.DB_CREATABLES: 1960 table_parts = self._parse_table_parts( 1961 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1962 ) 1963 1964 # exp.Properties.Location.POST_NAME 1965 self._match(TokenType.COMMA) 1966 extend_props(self._parse_properties(before=True)) 1967 1968 this = self._parse_schema(this=table_parts) 1969 1970 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1971 extend_props(self._parse_properties()) 1972 1973 self._match(TokenType.ALIAS) 1974 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1975 # exp.Properties.Location.POST_ALIAS 1976 extend_props(self._parse_properties()) 1977 1978 if create_token.token_type == TokenType.SEQUENCE: 1979 expression = self._parse_types() 1980 extend_props(self._parse_properties()) 1981 else: 1982 expression = self._parse_ddl_select() 1983 1984 if create_token.token_type == TokenType.TABLE: 1985 # exp.Properties.Location.POST_EXPRESSION 1986 extend_props(self._parse_properties()) 1987 1988 indexes = [] 1989 while True: 1990 index = self._parse_index() 1991 1992 # exp.Properties.Location.POST_INDEX 1993 extend_props(self._parse_properties()) 1994 if not index: 1995 break 1996 else: 1997 self._match(TokenType.COMMA) 1998 indexes.append(index) 1999 elif create_token.token_type == TokenType.VIEW: 2000 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2001 no_schema_binding = True 2002 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2003 extend_props(self._parse_properties()) 2004 2005 shallow = self._match_text_seq("SHALLOW") 2006 2007 if self._match_texts(self.CLONE_KEYWORDS): 2008 copy = self._prev.text.lower() == "copy" 2009 clone = self.expression( 2010 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2011 ) 2012 2013 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2014 return self._parse_as_command(start) 2015 2016 create_kind_text = create_token.text.upper() 2017 return self.expression( 2018 exp.Create, 2019 this=this, 2020 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2021 replace=replace, 2022 refresh=refresh, 2023 unique=unique, 2024 expression=expression, 2025 exists=exists, 2026 properties=properties, 2027 indexes=indexes, 2028 no_schema_binding=no_schema_binding, 2029 begin=begin, 2030 end=end, 2031 clone=clone, 2032 concurrently=concurrently, 2033 clustered=clustered, 2034 ) 2035 2036 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2037 seq = exp.SequenceProperties() 2038 2039 options = [] 2040 index = self._index 2041 2042 while self._curr: 2043 self._match(TokenType.COMMA) 2044 if self._match_text_seq("INCREMENT"): 2045 self._match_text_seq("BY") 2046 self._match_text_seq("=") 2047 seq.set("increment", self._parse_term()) 2048 elif self._match_text_seq("MINVALUE"): 2049 seq.set("minvalue", self._parse_term()) 2050 elif self._match_text_seq("MAXVALUE"): 2051 seq.set("maxvalue", self._parse_term()) 2052 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2053 self._match_text_seq("=") 2054 seq.set("start", self._parse_term()) 2055 elif self._match_text_seq("CACHE"): 2056 # T-SQL allows empty CACHE which is initialized dynamically 2057 seq.set("cache", self._parse_number() or True) 2058 elif self._match_text_seq("OWNED", "BY"): 2059 # "OWNED BY NONE" is the default 2060 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2061 else: 2062 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2063 if opt: 2064 options.append(opt) 2065 else: 2066 break 2067 2068 seq.set("options", options if options else None) 2069 return None if self._index == index else seq 2070 2071 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2072 # only used for teradata currently 2073 self._match(TokenType.COMMA) 2074 2075 kwargs = { 2076 "no": self._match_text_seq("NO"), 2077 "dual": self._match_text_seq("DUAL"), 2078 "before": self._match_text_seq("BEFORE"), 2079 "default": self._match_text_seq("DEFAULT"), 2080 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2081 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2082 "after": self._match_text_seq("AFTER"), 2083 "minimum": self._match_texts(("MIN", "MINIMUM")), 2084 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2085 } 2086 2087 if self._match_texts(self.PROPERTY_PARSERS): 2088 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2089 try: 2090 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2091 except TypeError: 2092 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2093 2094 return None 2095 2096 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2097 return self._parse_wrapped_csv(self._parse_property) 2098 2099 def _parse_property(self) -> t.Optional[exp.Expression]: 2100 if self._match_texts(self.PROPERTY_PARSERS): 2101 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2102 2103 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2104 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2105 2106 if self._match_text_seq("COMPOUND", "SORTKEY"): 2107 return self._parse_sortkey(compound=True) 2108 2109 if self._match_text_seq("SQL", "SECURITY"): 2110 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2111 2112 index = self._index 2113 key = self._parse_column() 2114 2115 if not self._match(TokenType.EQ): 2116 self._retreat(index) 2117 return self._parse_sequence_properties() 2118 2119 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2120 if isinstance(key, exp.Column): 2121 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2122 2123 value = self._parse_bitwise() or self._parse_var(any_token=True) 2124 2125 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2126 if isinstance(value, exp.Column): 2127 value = exp.var(value.name) 2128 2129 return self.expression(exp.Property, this=key, value=value) 2130 2131 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2132 if self._match_text_seq("BY"): 2133 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2134 2135 self._match(TokenType.ALIAS) 2136 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2137 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2138 2139 return self.expression( 2140 exp.FileFormatProperty, 2141 this=( 2142 self.expression( 2143 exp.InputOutputFormat, 2144 input_format=input_format, 2145 output_format=output_format, 2146 ) 2147 if input_format or output_format 2148 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2149 ), 2150 ) 2151 2152 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2153 field = self._parse_field() 2154 if isinstance(field, exp.Identifier) and not field.quoted: 2155 field = exp.var(field) 2156 2157 return field 2158 2159 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2160 self._match(TokenType.EQ) 2161 self._match(TokenType.ALIAS) 2162 2163 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2164 2165 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2166 properties = [] 2167 while True: 2168 if before: 2169 prop = self._parse_property_before() 2170 else: 2171 prop = self._parse_property() 2172 if not prop: 2173 break 2174 for p in ensure_list(prop): 2175 properties.append(p) 2176 2177 if properties: 2178 return self.expression(exp.Properties, expressions=properties) 2179 2180 return None 2181 2182 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2183 return self.expression( 2184 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2185 ) 2186 2187 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2188 if self._match_texts(("DEFINER", "INVOKER")): 2189 security_specifier = self._prev.text.upper() 2190 return self.expression(exp.SecurityProperty, this=security_specifier) 2191 return None 2192 2193 def _parse_settings_property(self) -> exp.SettingsProperty: 2194 return self.expression( 2195 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2196 ) 2197 2198 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2199 if self._index >= 2: 2200 pre_volatile_token = self._tokens[self._index - 2] 2201 else: 2202 pre_volatile_token = None 2203 2204 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2205 return exp.VolatileProperty() 2206 2207 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2208 2209 def _parse_retention_period(self) -> exp.Var: 2210 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2211 number = self._parse_number() 2212 number_str = f"{number} " if number else "" 2213 unit = self._parse_var(any_token=True) 2214 return exp.var(f"{number_str}{unit}") 2215 2216 def _parse_system_versioning_property( 2217 self, with_: bool = False 2218 ) -> exp.WithSystemVersioningProperty: 2219 self._match(TokenType.EQ) 2220 prop = self.expression( 2221 exp.WithSystemVersioningProperty, 2222 **{ # type: ignore 2223 "on": True, 2224 "with": with_, 2225 }, 2226 ) 2227 2228 if self._match_text_seq("OFF"): 2229 prop.set("on", False) 2230 return prop 2231 2232 self._match(TokenType.ON) 2233 if self._match(TokenType.L_PAREN): 2234 while self._curr and not self._match(TokenType.R_PAREN): 2235 if self._match_text_seq("HISTORY_TABLE", "="): 2236 prop.set("this", self._parse_table_parts()) 2237 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2238 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2239 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2240 prop.set("retention_period", self._parse_retention_period()) 2241 2242 self._match(TokenType.COMMA) 2243 2244 return prop 2245 2246 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2247 self._match(TokenType.EQ) 2248 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2249 prop = self.expression(exp.DataDeletionProperty, on=on) 2250 2251 if self._match(TokenType.L_PAREN): 2252 while self._curr and not self._match(TokenType.R_PAREN): 2253 if self._match_text_seq("FILTER_COLUMN", "="): 2254 prop.set("filter_column", self._parse_column()) 2255 elif self._match_text_seq("RETENTION_PERIOD", "="): 2256 prop.set("retention_period", self._parse_retention_period()) 2257 2258 self._match(TokenType.COMMA) 2259 2260 return prop 2261 2262 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2263 kind = "HASH" 2264 expressions: t.Optional[t.List[exp.Expression]] = None 2265 if self._match_text_seq("BY", "HASH"): 2266 expressions = self._parse_wrapped_csv(self._parse_id_var) 2267 elif self._match_text_seq("BY", "RANDOM"): 2268 kind = "RANDOM" 2269 2270 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2271 buckets: t.Optional[exp.Expression] = None 2272 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2273 buckets = self._parse_number() 2274 2275 return self.expression( 2276 exp.DistributedByProperty, 2277 expressions=expressions, 2278 kind=kind, 2279 buckets=buckets, 2280 order=self._parse_order(), 2281 ) 2282 2283 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2284 self._match_text_seq("KEY") 2285 expressions = self._parse_wrapped_id_vars() 2286 return self.expression(expr_type, expressions=expressions) 2287 2288 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2289 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2290 prop = self._parse_system_versioning_property(with_=True) 2291 self._match_r_paren() 2292 return prop 2293 2294 if self._match(TokenType.L_PAREN, advance=False): 2295 return self._parse_wrapped_properties() 2296 2297 if self._match_text_seq("JOURNAL"): 2298 return self._parse_withjournaltable() 2299 2300 if self._match_texts(self.VIEW_ATTRIBUTES): 2301 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2302 2303 if self._match_text_seq("DATA"): 2304 return self._parse_withdata(no=False) 2305 elif self._match_text_seq("NO", "DATA"): 2306 return self._parse_withdata(no=True) 2307 2308 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2309 return self._parse_serde_properties(with_=True) 2310 2311 if self._match(TokenType.SCHEMA): 2312 return self.expression( 2313 exp.WithSchemaBindingProperty, 2314 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2315 ) 2316 2317 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2318 return self.expression( 2319 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2320 ) 2321 2322 if not self._next: 2323 return None 2324 2325 return self._parse_withisolatedloading() 2326 2327 def _parse_procedure_option(self) -> exp.Expression | None: 2328 if self._match_text_seq("EXECUTE", "AS"): 2329 return self.expression( 2330 exp.ExecuteAsProperty, 2331 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2332 or self._parse_string(), 2333 ) 2334 2335 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2336 2337 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2338 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2339 self._match(TokenType.EQ) 2340 2341 user = self._parse_id_var() 2342 self._match(TokenType.PARAMETER) 2343 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2344 2345 if not user or not host: 2346 return None 2347 2348 return exp.DefinerProperty(this=f"{user}@{host}") 2349 2350 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2351 self._match(TokenType.TABLE) 2352 self._match(TokenType.EQ) 2353 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2354 2355 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2356 return self.expression(exp.LogProperty, no=no) 2357 2358 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2359 return self.expression(exp.JournalProperty, **kwargs) 2360 2361 def _parse_checksum(self) -> exp.ChecksumProperty: 2362 self._match(TokenType.EQ) 2363 2364 on = None 2365 if self._match(TokenType.ON): 2366 on = True 2367 elif self._match_text_seq("OFF"): 2368 on = False 2369 2370 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2371 2372 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2373 return self.expression( 2374 exp.Cluster, 2375 expressions=( 2376 self._parse_wrapped_csv(self._parse_ordered) 2377 if wrapped 2378 else self._parse_csv(self._parse_ordered) 2379 ), 2380 ) 2381 2382 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2383 self._match_text_seq("BY") 2384 2385 self._match_l_paren() 2386 expressions = self._parse_csv(self._parse_column) 2387 self._match_r_paren() 2388 2389 if self._match_text_seq("SORTED", "BY"): 2390 self._match_l_paren() 2391 sorted_by = self._parse_csv(self._parse_ordered) 2392 self._match_r_paren() 2393 else: 2394 sorted_by = None 2395 2396 self._match(TokenType.INTO) 2397 buckets = self._parse_number() 2398 self._match_text_seq("BUCKETS") 2399 2400 return self.expression( 2401 exp.ClusteredByProperty, 2402 expressions=expressions, 2403 sorted_by=sorted_by, 2404 buckets=buckets, 2405 ) 2406 2407 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2408 if not self._match_text_seq("GRANTS"): 2409 self._retreat(self._index - 1) 2410 return None 2411 2412 return self.expression(exp.CopyGrantsProperty) 2413 2414 def _parse_freespace(self) -> exp.FreespaceProperty: 2415 self._match(TokenType.EQ) 2416 return self.expression( 2417 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2418 ) 2419 2420 def _parse_mergeblockratio( 2421 self, no: bool = False, default: bool = False 2422 ) -> exp.MergeBlockRatioProperty: 2423 if self._match(TokenType.EQ): 2424 return self.expression( 2425 exp.MergeBlockRatioProperty, 2426 this=self._parse_number(), 2427 percent=self._match(TokenType.PERCENT), 2428 ) 2429 2430 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2431 2432 def _parse_datablocksize( 2433 self, 2434 default: t.Optional[bool] = None, 2435 minimum: t.Optional[bool] = None, 2436 maximum: t.Optional[bool] = None, 2437 ) -> exp.DataBlocksizeProperty: 2438 self._match(TokenType.EQ) 2439 size = self._parse_number() 2440 2441 units = None 2442 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2443 units = self._prev.text 2444 2445 return self.expression( 2446 exp.DataBlocksizeProperty, 2447 size=size, 2448 units=units, 2449 default=default, 2450 minimum=minimum, 2451 maximum=maximum, 2452 ) 2453 2454 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2455 self._match(TokenType.EQ) 2456 always = self._match_text_seq("ALWAYS") 2457 manual = self._match_text_seq("MANUAL") 2458 never = self._match_text_seq("NEVER") 2459 default = self._match_text_seq("DEFAULT") 2460 2461 autotemp = None 2462 if self._match_text_seq("AUTOTEMP"): 2463 autotemp = self._parse_schema() 2464 2465 return self.expression( 2466 exp.BlockCompressionProperty, 2467 always=always, 2468 manual=manual, 2469 never=never, 2470 default=default, 2471 autotemp=autotemp, 2472 ) 2473 2474 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2475 index = self._index 2476 no = self._match_text_seq("NO") 2477 concurrent = self._match_text_seq("CONCURRENT") 2478 2479 if not self._match_text_seq("ISOLATED", "LOADING"): 2480 self._retreat(index) 2481 return None 2482 2483 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2484 return self.expression( 2485 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2486 ) 2487 2488 def _parse_locking(self) -> exp.LockingProperty: 2489 if self._match(TokenType.TABLE): 2490 kind = "TABLE" 2491 elif self._match(TokenType.VIEW): 2492 kind = "VIEW" 2493 elif self._match(TokenType.ROW): 2494 kind = "ROW" 2495 elif self._match_text_seq("DATABASE"): 2496 kind = "DATABASE" 2497 else: 2498 kind = None 2499 2500 if kind in ("DATABASE", "TABLE", "VIEW"): 2501 this = self._parse_table_parts() 2502 else: 2503 this = None 2504 2505 if self._match(TokenType.FOR): 2506 for_or_in = "FOR" 2507 elif self._match(TokenType.IN): 2508 for_or_in = "IN" 2509 else: 2510 for_or_in = None 2511 2512 if self._match_text_seq("ACCESS"): 2513 lock_type = "ACCESS" 2514 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2515 lock_type = "EXCLUSIVE" 2516 elif self._match_text_seq("SHARE"): 2517 lock_type = "SHARE" 2518 elif self._match_text_seq("READ"): 2519 lock_type = "READ" 2520 elif self._match_text_seq("WRITE"): 2521 lock_type = "WRITE" 2522 elif self._match_text_seq("CHECKSUM"): 2523 lock_type = "CHECKSUM" 2524 else: 2525 lock_type = None 2526 2527 override = self._match_text_seq("OVERRIDE") 2528 2529 return self.expression( 2530 exp.LockingProperty, 2531 this=this, 2532 kind=kind, 2533 for_or_in=for_or_in, 2534 lock_type=lock_type, 2535 override=override, 2536 ) 2537 2538 def _parse_partition_by(self) -> t.List[exp.Expression]: 2539 if self._match(TokenType.PARTITION_BY): 2540 return self._parse_csv(self._parse_assignment) 2541 return [] 2542 2543 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2544 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2545 if self._match_text_seq("MINVALUE"): 2546 return exp.var("MINVALUE") 2547 if self._match_text_seq("MAXVALUE"): 2548 return exp.var("MAXVALUE") 2549 return self._parse_bitwise() 2550 2551 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2552 expression = None 2553 from_expressions = None 2554 to_expressions = None 2555 2556 if self._match(TokenType.IN): 2557 this = self._parse_wrapped_csv(self._parse_bitwise) 2558 elif self._match(TokenType.FROM): 2559 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2560 self._match_text_seq("TO") 2561 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2562 elif self._match_text_seq("WITH", "(", "MODULUS"): 2563 this = self._parse_number() 2564 self._match_text_seq(",", "REMAINDER") 2565 expression = self._parse_number() 2566 self._match_r_paren() 2567 else: 2568 self.raise_error("Failed to parse partition bound spec.") 2569 2570 return self.expression( 2571 exp.PartitionBoundSpec, 2572 this=this, 2573 expression=expression, 2574 from_expressions=from_expressions, 2575 to_expressions=to_expressions, 2576 ) 2577 2578 # https://www.postgresql.org/docs/current/sql-createtable.html 2579 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2580 if not self._match_text_seq("OF"): 2581 self._retreat(self._index - 1) 2582 return None 2583 2584 this = self._parse_table(schema=True) 2585 2586 if self._match(TokenType.DEFAULT): 2587 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2588 elif self._match_text_seq("FOR", "VALUES"): 2589 expression = self._parse_partition_bound_spec() 2590 else: 2591 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2592 2593 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2594 2595 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2596 self._match(TokenType.EQ) 2597 return self.expression( 2598 exp.PartitionedByProperty, 2599 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2600 ) 2601 2602 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2603 if self._match_text_seq("AND", "STATISTICS"): 2604 statistics = True 2605 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2606 statistics = False 2607 else: 2608 statistics = None 2609 2610 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2611 2612 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2613 if self._match_text_seq("SQL"): 2614 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2615 return None 2616 2617 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2618 if self._match_text_seq("SQL", "DATA"): 2619 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2620 return None 2621 2622 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2623 if self._match_text_seq("PRIMARY", "INDEX"): 2624 return exp.NoPrimaryIndexProperty() 2625 if self._match_text_seq("SQL"): 2626 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2627 return None 2628 2629 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2630 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2631 return exp.OnCommitProperty() 2632 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2633 return exp.OnCommitProperty(delete=True) 2634 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2635 2636 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2637 if self._match_text_seq("SQL", "DATA"): 2638 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2639 return None 2640 2641 def _parse_distkey(self) -> exp.DistKeyProperty: 2642 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2643 2644 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2645 table = self._parse_table(schema=True) 2646 2647 options = [] 2648 while self._match_texts(("INCLUDING", "EXCLUDING")): 2649 this = self._prev.text.upper() 2650 2651 id_var = self._parse_id_var() 2652 if not id_var: 2653 return None 2654 2655 options.append( 2656 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2657 ) 2658 2659 return self.expression(exp.LikeProperty, this=table, expressions=options) 2660 2661 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2662 return self.expression( 2663 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2664 ) 2665 2666 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2667 self._match(TokenType.EQ) 2668 return self.expression( 2669 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2670 ) 2671 2672 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2673 self._match_text_seq("WITH", "CONNECTION") 2674 return self.expression( 2675 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2676 ) 2677 2678 def _parse_returns(self) -> exp.ReturnsProperty: 2679 value: t.Optional[exp.Expression] 2680 null = None 2681 is_table = self._match(TokenType.TABLE) 2682 2683 if is_table: 2684 if self._match(TokenType.LT): 2685 value = self.expression( 2686 exp.Schema, 2687 this="TABLE", 2688 expressions=self._parse_csv(self._parse_struct_types), 2689 ) 2690 if not self._match(TokenType.GT): 2691 self.raise_error("Expecting >") 2692 else: 2693 value = self._parse_schema(exp.var("TABLE")) 2694 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2695 null = True 2696 value = None 2697 else: 2698 value = self._parse_types() 2699 2700 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2701 2702 def _parse_describe(self) -> exp.Describe: 2703 kind = self._match_set(self.CREATABLES) and self._prev.text 2704 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2705 if self._match(TokenType.DOT): 2706 style = None 2707 self._retreat(self._index - 2) 2708 2709 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2710 2711 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2712 this = self._parse_statement() 2713 else: 2714 this = self._parse_table(schema=True) 2715 2716 properties = self._parse_properties() 2717 expressions = properties.expressions if properties else None 2718 partition = self._parse_partition() 2719 return self.expression( 2720 exp.Describe, 2721 this=this, 2722 style=style, 2723 kind=kind, 2724 expressions=expressions, 2725 partition=partition, 2726 format=format, 2727 ) 2728 2729 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2730 kind = self._prev.text.upper() 2731 expressions = [] 2732 2733 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2734 if self._match(TokenType.WHEN): 2735 expression = self._parse_disjunction() 2736 self._match(TokenType.THEN) 2737 else: 2738 expression = None 2739 2740 else_ = self._match(TokenType.ELSE) 2741 2742 if not self._match(TokenType.INTO): 2743 return None 2744 2745 return self.expression( 2746 exp.ConditionalInsert, 2747 this=self.expression( 2748 exp.Insert, 2749 this=self._parse_table(schema=True), 2750 expression=self._parse_derived_table_values(), 2751 ), 2752 expression=expression, 2753 else_=else_, 2754 ) 2755 2756 expression = parse_conditional_insert() 2757 while expression is not None: 2758 expressions.append(expression) 2759 expression = parse_conditional_insert() 2760 2761 return self.expression( 2762 exp.MultitableInserts, 2763 kind=kind, 2764 comments=comments, 2765 expressions=expressions, 2766 source=self._parse_table(), 2767 ) 2768 2769 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2770 comments = [] 2771 hint = self._parse_hint() 2772 overwrite = self._match(TokenType.OVERWRITE) 2773 ignore = self._match(TokenType.IGNORE) 2774 local = self._match_text_seq("LOCAL") 2775 alternative = None 2776 is_function = None 2777 2778 if self._match_text_seq("DIRECTORY"): 2779 this: t.Optional[exp.Expression] = self.expression( 2780 exp.Directory, 2781 this=self._parse_var_or_string(), 2782 local=local, 2783 row_format=self._parse_row_format(match_row=True), 2784 ) 2785 else: 2786 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2787 comments += ensure_list(self._prev_comments) 2788 return self._parse_multitable_inserts(comments) 2789 2790 if self._match(TokenType.OR): 2791 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2792 2793 self._match(TokenType.INTO) 2794 comments += ensure_list(self._prev_comments) 2795 self._match(TokenType.TABLE) 2796 is_function = self._match(TokenType.FUNCTION) 2797 2798 this = ( 2799 self._parse_table(schema=True, parse_partition=True) 2800 if not is_function 2801 else self._parse_function() 2802 ) 2803 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2804 this.set("alias", self._parse_table_alias()) 2805 2806 returning = self._parse_returning() 2807 2808 return self.expression( 2809 exp.Insert, 2810 comments=comments, 2811 hint=hint, 2812 is_function=is_function, 2813 this=this, 2814 stored=self._match_text_seq("STORED") and self._parse_stored(), 2815 by_name=self._match_text_seq("BY", "NAME"), 2816 exists=self._parse_exists(), 2817 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2818 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2819 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2820 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2821 conflict=self._parse_on_conflict(), 2822 returning=returning or self._parse_returning(), 2823 overwrite=overwrite, 2824 alternative=alternative, 2825 ignore=ignore, 2826 source=self._match(TokenType.TABLE) and self._parse_table(), 2827 ) 2828 2829 def _parse_kill(self) -> exp.Kill: 2830 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2831 2832 return self.expression( 2833 exp.Kill, 2834 this=self._parse_primary(), 2835 kind=kind, 2836 ) 2837 2838 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2839 conflict = self._match_text_seq("ON", "CONFLICT") 2840 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2841 2842 if not conflict and not duplicate: 2843 return None 2844 2845 conflict_keys = None 2846 constraint = None 2847 2848 if conflict: 2849 if self._match_text_seq("ON", "CONSTRAINT"): 2850 constraint = self._parse_id_var() 2851 elif self._match(TokenType.L_PAREN): 2852 conflict_keys = self._parse_csv(self._parse_id_var) 2853 self._match_r_paren() 2854 2855 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2856 if self._prev.token_type == TokenType.UPDATE: 2857 self._match(TokenType.SET) 2858 expressions = self._parse_csv(self._parse_equality) 2859 else: 2860 expressions = None 2861 2862 return self.expression( 2863 exp.OnConflict, 2864 duplicate=duplicate, 2865 expressions=expressions, 2866 action=action, 2867 conflict_keys=conflict_keys, 2868 constraint=constraint, 2869 where=self._parse_where(), 2870 ) 2871 2872 def _parse_returning(self) -> t.Optional[exp.Returning]: 2873 if not self._match(TokenType.RETURNING): 2874 return None 2875 return self.expression( 2876 exp.Returning, 2877 expressions=self._parse_csv(self._parse_expression), 2878 into=self._match(TokenType.INTO) and self._parse_table_part(), 2879 ) 2880 2881 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2882 if not self._match(TokenType.FORMAT): 2883 return None 2884 return self._parse_row_format() 2885 2886 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2887 index = self._index 2888 with_ = with_ or self._match_text_seq("WITH") 2889 2890 if not self._match(TokenType.SERDE_PROPERTIES): 2891 self._retreat(index) 2892 return None 2893 return self.expression( 2894 exp.SerdeProperties, 2895 **{ # type: ignore 2896 "expressions": self._parse_wrapped_properties(), 2897 "with": with_, 2898 }, 2899 ) 2900 2901 def _parse_row_format( 2902 self, match_row: bool = False 2903 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2904 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2905 return None 2906 2907 if self._match_text_seq("SERDE"): 2908 this = self._parse_string() 2909 2910 serde_properties = self._parse_serde_properties() 2911 2912 return self.expression( 2913 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2914 ) 2915 2916 self._match_text_seq("DELIMITED") 2917 2918 kwargs = {} 2919 2920 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2921 kwargs["fields"] = self._parse_string() 2922 if self._match_text_seq("ESCAPED", "BY"): 2923 kwargs["escaped"] = self._parse_string() 2924 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2925 kwargs["collection_items"] = self._parse_string() 2926 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2927 kwargs["map_keys"] = self._parse_string() 2928 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2929 kwargs["lines"] = self._parse_string() 2930 if self._match_text_seq("NULL", "DEFINED", "AS"): 2931 kwargs["null"] = self._parse_string() 2932 2933 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2934 2935 def _parse_load(self) -> exp.LoadData | exp.Command: 2936 if self._match_text_seq("DATA"): 2937 local = self._match_text_seq("LOCAL") 2938 self._match_text_seq("INPATH") 2939 inpath = self._parse_string() 2940 overwrite = self._match(TokenType.OVERWRITE) 2941 self._match_pair(TokenType.INTO, TokenType.TABLE) 2942 2943 return self.expression( 2944 exp.LoadData, 2945 this=self._parse_table(schema=True), 2946 local=local, 2947 overwrite=overwrite, 2948 inpath=inpath, 2949 partition=self._parse_partition(), 2950 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2951 serde=self._match_text_seq("SERDE") and self._parse_string(), 2952 ) 2953 return self._parse_as_command(self._prev) 2954 2955 def _parse_delete(self) -> exp.Delete: 2956 # This handles MySQL's "Multiple-Table Syntax" 2957 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2958 tables = None 2959 if not self._match(TokenType.FROM, advance=False): 2960 tables = self._parse_csv(self._parse_table) or None 2961 2962 returning = self._parse_returning() 2963 2964 return self.expression( 2965 exp.Delete, 2966 tables=tables, 2967 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2968 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2969 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2970 where=self._parse_where(), 2971 returning=returning or self._parse_returning(), 2972 limit=self._parse_limit(), 2973 ) 2974 2975 def _parse_update(self) -> exp.Update: 2976 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2977 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2978 returning = self._parse_returning() 2979 return self.expression( 2980 exp.Update, 2981 **{ # type: ignore 2982 "this": this, 2983 "expressions": expressions, 2984 "from": self._parse_from(joins=True), 2985 "where": self._parse_where(), 2986 "returning": returning or self._parse_returning(), 2987 "order": self._parse_order(), 2988 "limit": self._parse_limit(), 2989 }, 2990 ) 2991 2992 def _parse_use(self) -> exp.Use: 2993 return self.expression( 2994 exp.Use, 2995 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2996 this=self._parse_table(schema=False), 2997 ) 2998 2999 def _parse_uncache(self) -> exp.Uncache: 3000 if not self._match(TokenType.TABLE): 3001 self.raise_error("Expecting TABLE after UNCACHE") 3002 3003 return self.expression( 3004 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3005 ) 3006 3007 def _parse_cache(self) -> exp.Cache: 3008 lazy = self._match_text_seq("LAZY") 3009 self._match(TokenType.TABLE) 3010 table = self._parse_table(schema=True) 3011 3012 options = [] 3013 if self._match_text_seq("OPTIONS"): 3014 self._match_l_paren() 3015 k = self._parse_string() 3016 self._match(TokenType.EQ) 3017 v = self._parse_string() 3018 options = [k, v] 3019 self._match_r_paren() 3020 3021 self._match(TokenType.ALIAS) 3022 return self.expression( 3023 exp.Cache, 3024 this=table, 3025 lazy=lazy, 3026 options=options, 3027 expression=self._parse_select(nested=True), 3028 ) 3029 3030 def _parse_partition(self) -> t.Optional[exp.Partition]: 3031 if not self._match_texts(self.PARTITION_KEYWORDS): 3032 return None 3033 3034 return self.expression( 3035 exp.Partition, 3036 subpartition=self._prev.text.upper() == "SUBPARTITION", 3037 expressions=self._parse_wrapped_csv(self._parse_assignment), 3038 ) 3039 3040 def _parse_value(self) -> t.Optional[exp.Tuple]: 3041 def _parse_value_expression() -> t.Optional[exp.Expression]: 3042 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3043 return exp.var(self._prev.text.upper()) 3044 return self._parse_expression() 3045 3046 if self._match(TokenType.L_PAREN): 3047 expressions = self._parse_csv(_parse_value_expression) 3048 self._match_r_paren() 3049 return self.expression(exp.Tuple, expressions=expressions) 3050 3051 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3052 expression = self._parse_expression() 3053 if expression: 3054 return self.expression(exp.Tuple, expressions=[expression]) 3055 return None 3056 3057 def _parse_projections(self) -> t.List[exp.Expression]: 3058 return self._parse_expressions() 3059 3060 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3061 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3062 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3063 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3064 ) 3065 elif self._match(TokenType.FROM): 3066 from_ = self._parse_from(skip_from_token=True) 3067 # Support parentheses for duckdb FROM-first syntax 3068 select = self._parse_select() 3069 if select: 3070 select.set("from", from_) 3071 this = select 3072 else: 3073 this = exp.select("*").from_(t.cast(exp.From, from_)) 3074 else: 3075 this = ( 3076 self._parse_table() 3077 if table 3078 else self._parse_select(nested=True, parse_set_operation=False) 3079 ) 3080 3081 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3082 # in case a modifier (e.g. join) is following 3083 if table and isinstance(this, exp.Values) and this.alias: 3084 alias = this.args["alias"].pop() 3085 this = exp.Table(this=this, alias=alias) 3086 3087 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3088 3089 return this 3090 3091 def _parse_select( 3092 self, 3093 nested: bool = False, 3094 table: bool = False, 3095 parse_subquery_alias: bool = True, 3096 parse_set_operation: bool = True, 3097 ) -> t.Optional[exp.Expression]: 3098 cte = self._parse_with() 3099 3100 if cte: 3101 this = self._parse_statement() 3102 3103 if not this: 3104 self.raise_error("Failed to parse any statement following CTE") 3105 return cte 3106 3107 if "with" in this.arg_types: 3108 this.set("with", cte) 3109 else: 3110 self.raise_error(f"{this.key} does not support CTE") 3111 this = cte 3112 3113 return this 3114 3115 # duckdb supports leading with FROM x 3116 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3117 3118 if self._match(TokenType.SELECT): 3119 comments = self._prev_comments 3120 3121 hint = self._parse_hint() 3122 3123 if self._next and not self._next.token_type == TokenType.DOT: 3124 all_ = self._match(TokenType.ALL) 3125 distinct = self._match_set(self.DISTINCT_TOKENS) 3126 else: 3127 all_, distinct = None, None 3128 3129 kind = ( 3130 self._match(TokenType.ALIAS) 3131 and self._match_texts(("STRUCT", "VALUE")) 3132 and self._prev.text.upper() 3133 ) 3134 3135 if distinct: 3136 distinct = self.expression( 3137 exp.Distinct, 3138 on=self._parse_value() if self._match(TokenType.ON) else None, 3139 ) 3140 3141 if all_ and distinct: 3142 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3143 3144 operation_modifiers = [] 3145 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3146 operation_modifiers.append(exp.var(self._prev.text.upper())) 3147 3148 limit = self._parse_limit(top=True) 3149 projections = self._parse_projections() 3150 3151 this = self.expression( 3152 exp.Select, 3153 kind=kind, 3154 hint=hint, 3155 distinct=distinct, 3156 expressions=projections, 3157 limit=limit, 3158 operation_modifiers=operation_modifiers or None, 3159 ) 3160 this.comments = comments 3161 3162 into = self._parse_into() 3163 if into: 3164 this.set("into", into) 3165 3166 if not from_: 3167 from_ = self._parse_from() 3168 3169 if from_: 3170 this.set("from", from_) 3171 3172 this = self._parse_query_modifiers(this) 3173 elif (table or nested) and self._match(TokenType.L_PAREN): 3174 this = self._parse_wrapped_select(table=table) 3175 3176 # We return early here so that the UNION isn't attached to the subquery by the 3177 # following call to _parse_set_operations, but instead becomes the parent node 3178 self._match_r_paren() 3179 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3180 elif self._match(TokenType.VALUES, advance=False): 3181 this = self._parse_derived_table_values() 3182 elif from_: 3183 this = exp.select("*").from_(from_.this, copy=False) 3184 elif self._match(TokenType.SUMMARIZE): 3185 table = self._match(TokenType.TABLE) 3186 this = self._parse_select() or self._parse_string() or self._parse_table() 3187 return self.expression(exp.Summarize, this=this, table=table) 3188 elif self._match(TokenType.DESCRIBE): 3189 this = self._parse_describe() 3190 elif self._match_text_seq("STREAM"): 3191 this = self._parse_function() 3192 if this: 3193 this = self.expression(exp.Stream, this=this) 3194 else: 3195 self._retreat(self._index - 1) 3196 else: 3197 this = None 3198 3199 return self._parse_set_operations(this) if parse_set_operation else this 3200 3201 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3202 self._match_text_seq("SEARCH") 3203 3204 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3205 3206 if not kind: 3207 return None 3208 3209 self._match_text_seq("FIRST", "BY") 3210 3211 return self.expression( 3212 exp.RecursiveWithSearch, 3213 kind=kind, 3214 this=self._parse_id_var(), 3215 expression=self._match_text_seq("SET") and self._parse_id_var(), 3216 using=self._match_text_seq("USING") and self._parse_id_var(), 3217 ) 3218 3219 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3220 if not skip_with_token and not self._match(TokenType.WITH): 3221 return None 3222 3223 comments = self._prev_comments 3224 recursive = self._match(TokenType.RECURSIVE) 3225 3226 last_comments = None 3227 expressions = [] 3228 while True: 3229 cte = self._parse_cte() 3230 if isinstance(cte, exp.CTE): 3231 expressions.append(cte) 3232 if last_comments: 3233 cte.add_comments(last_comments) 3234 3235 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3236 break 3237 else: 3238 self._match(TokenType.WITH) 3239 3240 last_comments = self._prev_comments 3241 3242 return self.expression( 3243 exp.With, 3244 comments=comments, 3245 expressions=expressions, 3246 recursive=recursive, 3247 search=self._parse_recursive_with_search(), 3248 ) 3249 3250 def _parse_cte(self) -> t.Optional[exp.CTE]: 3251 index = self._index 3252 3253 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3254 if not alias or not alias.this: 3255 self.raise_error("Expected CTE to have alias") 3256 3257 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3258 self._retreat(index) 3259 return None 3260 3261 comments = self._prev_comments 3262 3263 if self._match_text_seq("NOT", "MATERIALIZED"): 3264 materialized = False 3265 elif self._match_text_seq("MATERIALIZED"): 3266 materialized = True 3267 else: 3268 materialized = None 3269 3270 cte = self.expression( 3271 exp.CTE, 3272 this=self._parse_wrapped(self._parse_statement), 3273 alias=alias, 3274 materialized=materialized, 3275 comments=comments, 3276 ) 3277 3278 if isinstance(cte.this, exp.Values): 3279 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3280 3281 return cte 3282 3283 def _parse_table_alias( 3284 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3285 ) -> t.Optional[exp.TableAlias]: 3286 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3287 # so this section tries to parse the clause version and if it fails, it treats the token 3288 # as an identifier (alias) 3289 if self._can_parse_limit_or_offset(): 3290 return None 3291 3292 any_token = self._match(TokenType.ALIAS) 3293 alias = ( 3294 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3295 or self._parse_string_as_identifier() 3296 ) 3297 3298 index = self._index 3299 if self._match(TokenType.L_PAREN): 3300 columns = self._parse_csv(self._parse_function_parameter) 3301 self._match_r_paren() if columns else self._retreat(index) 3302 else: 3303 columns = None 3304 3305 if not alias and not columns: 3306 return None 3307 3308 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3309 3310 # We bubble up comments from the Identifier to the TableAlias 3311 if isinstance(alias, exp.Identifier): 3312 table_alias.add_comments(alias.pop_comments()) 3313 3314 return table_alias 3315 3316 def _parse_subquery( 3317 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3318 ) -> t.Optional[exp.Subquery]: 3319 if not this: 3320 return None 3321 3322 return self.expression( 3323 exp.Subquery, 3324 this=this, 3325 pivots=self._parse_pivots(), 3326 alias=self._parse_table_alias() if parse_alias else None, 3327 sample=self._parse_table_sample(), 3328 ) 3329 3330 def _implicit_unnests_to_explicit(self, this: E) -> E: 3331 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3332 3333 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3334 for i, join in enumerate(this.args.get("joins") or []): 3335 table = join.this 3336 normalized_table = table.copy() 3337 normalized_table.meta["maybe_column"] = True 3338 normalized_table = _norm(normalized_table, dialect=self.dialect) 3339 3340 if isinstance(table, exp.Table) and not join.args.get("on"): 3341 if normalized_table.parts[0].name in refs: 3342 table_as_column = table.to_column() 3343 unnest = exp.Unnest(expressions=[table_as_column]) 3344 3345 # Table.to_column creates a parent Alias node that we want to convert to 3346 # a TableAlias and attach to the Unnest, so it matches the parser's output 3347 if isinstance(table.args.get("alias"), exp.TableAlias): 3348 table_as_column.replace(table_as_column.this) 3349 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3350 3351 table.replace(unnest) 3352 3353 refs.add(normalized_table.alias_or_name) 3354 3355 return this 3356 3357 def _parse_query_modifiers( 3358 self, this: t.Optional[exp.Expression] 3359 ) -> t.Optional[exp.Expression]: 3360 if isinstance(this, self.MODIFIABLES): 3361 for join in self._parse_joins(): 3362 this.append("joins", join) 3363 for lateral in iter(self._parse_lateral, None): 3364 this.append("laterals", lateral) 3365 3366 while True: 3367 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3368 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3369 key, expression = parser(self) 3370 3371 if expression: 3372 this.set(key, expression) 3373 if key == "limit": 3374 offset = expression.args.pop("offset", None) 3375 3376 if offset: 3377 offset = exp.Offset(expression=offset) 3378 this.set("offset", offset) 3379 3380 limit_by_expressions = expression.expressions 3381 expression.set("expressions", None) 3382 offset.set("expressions", limit_by_expressions) 3383 continue 3384 break 3385 3386 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3387 this = self._implicit_unnests_to_explicit(this) 3388 3389 return this 3390 3391 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3392 start = self._curr 3393 while self._curr: 3394 self._advance() 3395 3396 end = self._tokens[self._index - 1] 3397 return exp.Hint(expressions=[self._find_sql(start, end)]) 3398 3399 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3400 return self._parse_function_call() 3401 3402 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3403 start_index = self._index 3404 should_fallback_to_string = False 3405 3406 hints = [] 3407 try: 3408 for hint in iter( 3409 lambda: self._parse_csv( 3410 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3411 ), 3412 [], 3413 ): 3414 hints.extend(hint) 3415 except ParseError: 3416 should_fallback_to_string = True 3417 3418 if should_fallback_to_string or self._curr: 3419 self._retreat(start_index) 3420 return self._parse_hint_fallback_to_string() 3421 3422 return self.expression(exp.Hint, expressions=hints) 3423 3424 def _parse_hint(self) -> t.Optional[exp.Hint]: 3425 if self._match(TokenType.HINT) and self._prev_comments: 3426 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3427 3428 return None 3429 3430 def _parse_into(self) -> t.Optional[exp.Into]: 3431 if not self._match(TokenType.INTO): 3432 return None 3433 3434 temp = self._match(TokenType.TEMPORARY) 3435 unlogged = self._match_text_seq("UNLOGGED") 3436 self._match(TokenType.TABLE) 3437 3438 return self.expression( 3439 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3440 ) 3441 3442 def _parse_from( 3443 self, joins: bool = False, skip_from_token: bool = False 3444 ) -> t.Optional[exp.From]: 3445 if not skip_from_token and not self._match(TokenType.FROM): 3446 return None 3447 3448 return self.expression( 3449 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3450 ) 3451 3452 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3453 return self.expression( 3454 exp.MatchRecognizeMeasure, 3455 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3456 this=self._parse_expression(), 3457 ) 3458 3459 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3460 if not self._match(TokenType.MATCH_RECOGNIZE): 3461 return None 3462 3463 self._match_l_paren() 3464 3465 partition = self._parse_partition_by() 3466 order = self._parse_order() 3467 3468 measures = ( 3469 self._parse_csv(self._parse_match_recognize_measure) 3470 if self._match_text_seq("MEASURES") 3471 else None 3472 ) 3473 3474 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3475 rows = exp.var("ONE ROW PER MATCH") 3476 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3477 text = "ALL ROWS PER MATCH" 3478 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3479 text += " SHOW EMPTY MATCHES" 3480 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3481 text += " OMIT EMPTY MATCHES" 3482 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3483 text += " WITH UNMATCHED ROWS" 3484 rows = exp.var(text) 3485 else: 3486 rows = None 3487 3488 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3489 text = "AFTER MATCH SKIP" 3490 if self._match_text_seq("PAST", "LAST", "ROW"): 3491 text += " PAST LAST ROW" 3492 elif self._match_text_seq("TO", "NEXT", "ROW"): 3493 text += " TO NEXT ROW" 3494 elif self._match_text_seq("TO", "FIRST"): 3495 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3496 elif self._match_text_seq("TO", "LAST"): 3497 text += f" TO LAST {self._advance_any().text}" # type: ignore 3498 after = exp.var(text) 3499 else: 3500 after = None 3501 3502 if self._match_text_seq("PATTERN"): 3503 self._match_l_paren() 3504 3505 if not self._curr: 3506 self.raise_error("Expecting )", self._curr) 3507 3508 paren = 1 3509 start = self._curr 3510 3511 while self._curr and paren > 0: 3512 if self._curr.token_type == TokenType.L_PAREN: 3513 paren += 1 3514 if self._curr.token_type == TokenType.R_PAREN: 3515 paren -= 1 3516 3517 end = self._prev 3518 self._advance() 3519 3520 if paren > 0: 3521 self.raise_error("Expecting )", self._curr) 3522 3523 pattern = exp.var(self._find_sql(start, end)) 3524 else: 3525 pattern = None 3526 3527 define = ( 3528 self._parse_csv(self._parse_name_as_expression) 3529 if self._match_text_seq("DEFINE") 3530 else None 3531 ) 3532 3533 self._match_r_paren() 3534 3535 return self.expression( 3536 exp.MatchRecognize, 3537 partition_by=partition, 3538 order=order, 3539 measures=measures, 3540 rows=rows, 3541 after=after, 3542 pattern=pattern, 3543 define=define, 3544 alias=self._parse_table_alias(), 3545 ) 3546 3547 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3548 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3549 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3550 cross_apply = False 3551 3552 if cross_apply is not None: 3553 this = self._parse_select(table=True) 3554 view = None 3555 outer = None 3556 elif self._match(TokenType.LATERAL): 3557 this = self._parse_select(table=True) 3558 view = self._match(TokenType.VIEW) 3559 outer = self._match(TokenType.OUTER) 3560 else: 3561 return None 3562 3563 if not this: 3564 this = ( 3565 self._parse_unnest() 3566 or self._parse_function() 3567 or self._parse_id_var(any_token=False) 3568 ) 3569 3570 while self._match(TokenType.DOT): 3571 this = exp.Dot( 3572 this=this, 3573 expression=self._parse_function() or self._parse_id_var(any_token=False), 3574 ) 3575 3576 if view: 3577 table = self._parse_id_var(any_token=False) 3578 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3579 table_alias: t.Optional[exp.TableAlias] = self.expression( 3580 exp.TableAlias, this=table, columns=columns 3581 ) 3582 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3583 # We move the alias from the lateral's child node to the lateral itself 3584 table_alias = this.args["alias"].pop() 3585 else: 3586 table_alias = self._parse_table_alias() 3587 3588 return self.expression( 3589 exp.Lateral, 3590 this=this, 3591 view=view, 3592 outer=outer, 3593 alias=table_alias, 3594 cross_apply=cross_apply, 3595 ) 3596 3597 def _parse_join_parts( 3598 self, 3599 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3600 return ( 3601 self._match_set(self.JOIN_METHODS) and self._prev, 3602 self._match_set(self.JOIN_SIDES) and self._prev, 3603 self._match_set(self.JOIN_KINDS) and self._prev, 3604 ) 3605 3606 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3607 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3608 this = self._parse_column() 3609 if isinstance(this, exp.Column): 3610 return this.this 3611 return this 3612 3613 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3614 3615 def _parse_join( 3616 self, skip_join_token: bool = False, parse_bracket: bool = False 3617 ) -> t.Optional[exp.Join]: 3618 if self._match(TokenType.COMMA): 3619 table = self._try_parse(self._parse_table) 3620 if table: 3621 return self.expression(exp.Join, this=table) 3622 return None 3623 3624 index = self._index 3625 method, side, kind = self._parse_join_parts() 3626 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3627 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3628 3629 if not skip_join_token and not join: 3630 self._retreat(index) 3631 kind = None 3632 method = None 3633 side = None 3634 3635 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3636 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3637 3638 if not skip_join_token and not join and not outer_apply and not cross_apply: 3639 return None 3640 3641 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3642 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3643 kwargs["expressions"] = self._parse_csv( 3644 lambda: self._parse_table(parse_bracket=parse_bracket) 3645 ) 3646 3647 if method: 3648 kwargs["method"] = method.text 3649 if side: 3650 kwargs["side"] = side.text 3651 if kind: 3652 kwargs["kind"] = kind.text 3653 if hint: 3654 kwargs["hint"] = hint 3655 3656 if self._match(TokenType.MATCH_CONDITION): 3657 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3658 3659 if self._match(TokenType.ON): 3660 kwargs["on"] = self._parse_assignment() 3661 elif self._match(TokenType.USING): 3662 kwargs["using"] = self._parse_using_identifiers() 3663 elif ( 3664 not (outer_apply or cross_apply) 3665 and not isinstance(kwargs["this"], exp.Unnest) 3666 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3667 ): 3668 index = self._index 3669 joins: t.Optional[list] = list(self._parse_joins()) 3670 3671 if joins and self._match(TokenType.ON): 3672 kwargs["on"] = self._parse_assignment() 3673 elif joins and self._match(TokenType.USING): 3674 kwargs["using"] = self._parse_using_identifiers() 3675 else: 3676 joins = None 3677 self._retreat(index) 3678 3679 kwargs["this"].set("joins", joins if joins else None) 3680 3681 comments = [c for token in (method, side, kind) if token for c in token.comments] 3682 return self.expression(exp.Join, comments=comments, **kwargs) 3683 3684 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3685 this = self._parse_assignment() 3686 3687 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3688 return this 3689 3690 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3691 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3692 3693 return this 3694 3695 def _parse_index_params(self) -> exp.IndexParameters: 3696 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3697 3698 if self._match(TokenType.L_PAREN, advance=False): 3699 columns = self._parse_wrapped_csv(self._parse_with_operator) 3700 else: 3701 columns = None 3702 3703 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3704 partition_by = self._parse_partition_by() 3705 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3706 tablespace = ( 3707 self._parse_var(any_token=True) 3708 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3709 else None 3710 ) 3711 where = self._parse_where() 3712 3713 on = self._parse_field() if self._match(TokenType.ON) else None 3714 3715 return self.expression( 3716 exp.IndexParameters, 3717 using=using, 3718 columns=columns, 3719 include=include, 3720 partition_by=partition_by, 3721 where=where, 3722 with_storage=with_storage, 3723 tablespace=tablespace, 3724 on=on, 3725 ) 3726 3727 def _parse_index( 3728 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3729 ) -> t.Optional[exp.Index]: 3730 if index or anonymous: 3731 unique = None 3732 primary = None 3733 amp = None 3734 3735 self._match(TokenType.ON) 3736 self._match(TokenType.TABLE) # hive 3737 table = self._parse_table_parts(schema=True) 3738 else: 3739 unique = self._match(TokenType.UNIQUE) 3740 primary = self._match_text_seq("PRIMARY") 3741 amp = self._match_text_seq("AMP") 3742 3743 if not self._match(TokenType.INDEX): 3744 return None 3745 3746 index = self._parse_id_var() 3747 table = None 3748 3749 params = self._parse_index_params() 3750 3751 return self.expression( 3752 exp.Index, 3753 this=index, 3754 table=table, 3755 unique=unique, 3756 primary=primary, 3757 amp=amp, 3758 params=params, 3759 ) 3760 3761 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3762 hints: t.List[exp.Expression] = [] 3763 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3764 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3765 hints.append( 3766 self.expression( 3767 exp.WithTableHint, 3768 expressions=self._parse_csv( 3769 lambda: self._parse_function() or self._parse_var(any_token=True) 3770 ), 3771 ) 3772 ) 3773 self._match_r_paren() 3774 else: 3775 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3776 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3777 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3778 3779 self._match_set((TokenType.INDEX, TokenType.KEY)) 3780 if self._match(TokenType.FOR): 3781 hint.set("target", self._advance_any() and self._prev.text.upper()) 3782 3783 hint.set("expressions", self._parse_wrapped_id_vars()) 3784 hints.append(hint) 3785 3786 return hints or None 3787 3788 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3789 return ( 3790 (not schema and self._parse_function(optional_parens=False)) 3791 or self._parse_id_var(any_token=False) 3792 or self._parse_string_as_identifier() 3793 or self._parse_placeholder() 3794 ) 3795 3796 def _parse_table_parts( 3797 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3798 ) -> exp.Table: 3799 catalog = None 3800 db = None 3801 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3802 3803 while self._match(TokenType.DOT): 3804 if catalog: 3805 # This allows nesting the table in arbitrarily many dot expressions if needed 3806 table = self.expression( 3807 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3808 ) 3809 else: 3810 catalog = db 3811 db = table 3812 # "" used for tsql FROM a..b case 3813 table = self._parse_table_part(schema=schema) or "" 3814 3815 if ( 3816 wildcard 3817 and self._is_connected() 3818 and (isinstance(table, exp.Identifier) or not table) 3819 and self._match(TokenType.STAR) 3820 ): 3821 if isinstance(table, exp.Identifier): 3822 table.args["this"] += "*" 3823 else: 3824 table = exp.Identifier(this="*") 3825 3826 # We bubble up comments from the Identifier to the Table 3827 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3828 3829 if is_db_reference: 3830 catalog = db 3831 db = table 3832 table = None 3833 3834 if not table and not is_db_reference: 3835 self.raise_error(f"Expected table name but got {self._curr}") 3836 if not db and is_db_reference: 3837 self.raise_error(f"Expected database name but got {self._curr}") 3838 3839 table = self.expression( 3840 exp.Table, 3841 comments=comments, 3842 this=table, 3843 db=db, 3844 catalog=catalog, 3845 ) 3846 3847 changes = self._parse_changes() 3848 if changes: 3849 table.set("changes", changes) 3850 3851 at_before = self._parse_historical_data() 3852 if at_before: 3853 table.set("when", at_before) 3854 3855 pivots = self._parse_pivots() 3856 if pivots: 3857 table.set("pivots", pivots) 3858 3859 return table 3860 3861 def _parse_table( 3862 self, 3863 schema: bool = False, 3864 joins: bool = False, 3865 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3866 parse_bracket: bool = False, 3867 is_db_reference: bool = False, 3868 parse_partition: bool = False, 3869 ) -> t.Optional[exp.Expression]: 3870 lateral = self._parse_lateral() 3871 if lateral: 3872 return lateral 3873 3874 unnest = self._parse_unnest() 3875 if unnest: 3876 return unnest 3877 3878 values = self._parse_derived_table_values() 3879 if values: 3880 return values 3881 3882 subquery = self._parse_select(table=True) 3883 if subquery: 3884 if not subquery.args.get("pivots"): 3885 subquery.set("pivots", self._parse_pivots()) 3886 return subquery 3887 3888 bracket = parse_bracket and self._parse_bracket(None) 3889 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3890 3891 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3892 self._parse_table 3893 ) 3894 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3895 3896 only = self._match(TokenType.ONLY) 3897 3898 this = t.cast( 3899 exp.Expression, 3900 bracket 3901 or rows_from 3902 or self._parse_bracket( 3903 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3904 ), 3905 ) 3906 3907 if only: 3908 this.set("only", only) 3909 3910 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3911 self._match_text_seq("*") 3912 3913 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3914 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3915 this.set("partition", self._parse_partition()) 3916 3917 if schema: 3918 return self._parse_schema(this=this) 3919 3920 version = self._parse_version() 3921 3922 if version: 3923 this.set("version", version) 3924 3925 if self.dialect.ALIAS_POST_TABLESAMPLE: 3926 this.set("sample", self._parse_table_sample()) 3927 3928 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3929 if alias: 3930 this.set("alias", alias) 3931 3932 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3933 return self.expression( 3934 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3935 ) 3936 3937 this.set("hints", self._parse_table_hints()) 3938 3939 if not this.args.get("pivots"): 3940 this.set("pivots", self._parse_pivots()) 3941 3942 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3943 this.set("sample", self._parse_table_sample()) 3944 3945 if joins: 3946 for join in self._parse_joins(): 3947 this.append("joins", join) 3948 3949 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3950 this.set("ordinality", True) 3951 this.set("alias", self._parse_table_alias()) 3952 3953 return this 3954 3955 def _parse_version(self) -> t.Optional[exp.Version]: 3956 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3957 this = "TIMESTAMP" 3958 elif self._match(TokenType.VERSION_SNAPSHOT): 3959 this = "VERSION" 3960 else: 3961 return None 3962 3963 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3964 kind = self._prev.text.upper() 3965 start = self._parse_bitwise() 3966 self._match_texts(("TO", "AND")) 3967 end = self._parse_bitwise() 3968 expression: t.Optional[exp.Expression] = self.expression( 3969 exp.Tuple, expressions=[start, end] 3970 ) 3971 elif self._match_text_seq("CONTAINED", "IN"): 3972 kind = "CONTAINED IN" 3973 expression = self.expression( 3974 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3975 ) 3976 elif self._match(TokenType.ALL): 3977 kind = "ALL" 3978 expression = None 3979 else: 3980 self._match_text_seq("AS", "OF") 3981 kind = "AS OF" 3982 expression = self._parse_type() 3983 3984 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3985 3986 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3987 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3988 index = self._index 3989 historical_data = None 3990 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3991 this = self._prev.text.upper() 3992 kind = ( 3993 self._match(TokenType.L_PAREN) 3994 and self._match_texts(self.HISTORICAL_DATA_KIND) 3995 and self._prev.text.upper() 3996 ) 3997 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3998 3999 if expression: 4000 self._match_r_paren() 4001 historical_data = self.expression( 4002 exp.HistoricalData, this=this, kind=kind, expression=expression 4003 ) 4004 else: 4005 self._retreat(index) 4006 4007 return historical_data 4008 4009 def _parse_changes(self) -> t.Optional[exp.Changes]: 4010 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4011 return None 4012 4013 information = self._parse_var(any_token=True) 4014 self._match_r_paren() 4015 4016 return self.expression( 4017 exp.Changes, 4018 information=information, 4019 at_before=self._parse_historical_data(), 4020 end=self._parse_historical_data(), 4021 ) 4022 4023 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4024 if not self._match(TokenType.UNNEST): 4025 return None 4026 4027 expressions = self._parse_wrapped_csv(self._parse_equality) 4028 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4029 4030 alias = self._parse_table_alias() if with_alias else None 4031 4032 if alias: 4033 if self.dialect.UNNEST_COLUMN_ONLY: 4034 if alias.args.get("columns"): 4035 self.raise_error("Unexpected extra column alias in unnest.") 4036 4037 alias.set("columns", [alias.this]) 4038 alias.set("this", None) 4039 4040 columns = alias.args.get("columns") or [] 4041 if offset and len(expressions) < len(columns): 4042 offset = columns.pop() 4043 4044 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4045 self._match(TokenType.ALIAS) 4046 offset = self._parse_id_var( 4047 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4048 ) or exp.to_identifier("offset") 4049 4050 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4051 4052 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4053 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4054 if not is_derived and not ( 4055 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4056 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4057 ): 4058 return None 4059 4060 expressions = self._parse_csv(self._parse_value) 4061 alias = self._parse_table_alias() 4062 4063 if is_derived: 4064 self._match_r_paren() 4065 4066 return self.expression( 4067 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4068 ) 4069 4070 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4071 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4072 as_modifier and self._match_text_seq("USING", "SAMPLE") 4073 ): 4074 return None 4075 4076 bucket_numerator = None 4077 bucket_denominator = None 4078 bucket_field = None 4079 percent = None 4080 size = None 4081 seed = None 4082 4083 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4084 matched_l_paren = self._match(TokenType.L_PAREN) 4085 4086 if self.TABLESAMPLE_CSV: 4087 num = None 4088 expressions = self._parse_csv(self._parse_primary) 4089 else: 4090 expressions = None 4091 num = ( 4092 self._parse_factor() 4093 if self._match(TokenType.NUMBER, advance=False) 4094 else self._parse_primary() or self._parse_placeholder() 4095 ) 4096 4097 if self._match_text_seq("BUCKET"): 4098 bucket_numerator = self._parse_number() 4099 self._match_text_seq("OUT", "OF") 4100 bucket_denominator = bucket_denominator = self._parse_number() 4101 self._match(TokenType.ON) 4102 bucket_field = self._parse_field() 4103 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4104 percent = num 4105 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4106 size = num 4107 else: 4108 percent = num 4109 4110 if matched_l_paren: 4111 self._match_r_paren() 4112 4113 if self._match(TokenType.L_PAREN): 4114 method = self._parse_var(upper=True) 4115 seed = self._match(TokenType.COMMA) and self._parse_number() 4116 self._match_r_paren() 4117 elif self._match_texts(("SEED", "REPEATABLE")): 4118 seed = self._parse_wrapped(self._parse_number) 4119 4120 if not method and self.DEFAULT_SAMPLING_METHOD: 4121 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4122 4123 return self.expression( 4124 exp.TableSample, 4125 expressions=expressions, 4126 method=method, 4127 bucket_numerator=bucket_numerator, 4128 bucket_denominator=bucket_denominator, 4129 bucket_field=bucket_field, 4130 percent=percent, 4131 size=size, 4132 seed=seed, 4133 ) 4134 4135 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4136 return list(iter(self._parse_pivot, None)) or None 4137 4138 def _parse_joins(self) -> t.Iterator[exp.Join]: 4139 return iter(self._parse_join, None) 4140 4141 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4142 if not self._match(TokenType.INTO): 4143 return None 4144 4145 return self.expression( 4146 exp.UnpivotColumns, 4147 this=self._match_text_seq("NAME") and self._parse_column(), 4148 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4149 ) 4150 4151 # https://duckdb.org/docs/sql/statements/pivot 4152 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4153 def _parse_on() -> t.Optional[exp.Expression]: 4154 this = self._parse_bitwise() 4155 4156 if self._match(TokenType.IN): 4157 # PIVOT ... ON col IN (row_val1, row_val2) 4158 return self._parse_in(this) 4159 if self._match(TokenType.ALIAS, advance=False): 4160 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4161 return self._parse_alias(this) 4162 4163 return this 4164 4165 this = self._parse_table() 4166 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4167 into = self._parse_unpivot_columns() 4168 using = self._match(TokenType.USING) and self._parse_csv( 4169 lambda: self._parse_alias(self._parse_function()) 4170 ) 4171 group = self._parse_group() 4172 4173 return self.expression( 4174 exp.Pivot, 4175 this=this, 4176 expressions=expressions, 4177 using=using, 4178 group=group, 4179 unpivot=is_unpivot, 4180 into=into, 4181 ) 4182 4183 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4184 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4185 this = self._parse_select_or_expression() 4186 4187 self._match(TokenType.ALIAS) 4188 alias = self._parse_bitwise() 4189 if alias: 4190 if isinstance(alias, exp.Column) and not alias.db: 4191 alias = alias.this 4192 return self.expression(exp.PivotAlias, this=this, alias=alias) 4193 4194 return this 4195 4196 value = self._parse_column() 4197 4198 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4199 self.raise_error("Expecting IN (") 4200 4201 if self._match(TokenType.ANY): 4202 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4203 else: 4204 exprs = self._parse_csv(_parse_aliased_expression) 4205 4206 self._match_r_paren() 4207 return self.expression(exp.In, this=value, expressions=exprs) 4208 4209 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4210 index = self._index 4211 include_nulls = None 4212 4213 if self._match(TokenType.PIVOT): 4214 unpivot = False 4215 elif self._match(TokenType.UNPIVOT): 4216 unpivot = True 4217 4218 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4219 if self._match_text_seq("INCLUDE", "NULLS"): 4220 include_nulls = True 4221 elif self._match_text_seq("EXCLUDE", "NULLS"): 4222 include_nulls = False 4223 else: 4224 return None 4225 4226 expressions = [] 4227 4228 if not self._match(TokenType.L_PAREN): 4229 self._retreat(index) 4230 return None 4231 4232 if unpivot: 4233 expressions = self._parse_csv(self._parse_column) 4234 else: 4235 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4236 4237 if not expressions: 4238 self.raise_error("Failed to parse PIVOT's aggregation list") 4239 4240 if not self._match(TokenType.FOR): 4241 self.raise_error("Expecting FOR") 4242 4243 field = self._parse_pivot_in() 4244 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4245 self._parse_bitwise 4246 ) 4247 4248 self._match_r_paren() 4249 4250 pivot = self.expression( 4251 exp.Pivot, 4252 expressions=expressions, 4253 field=field, 4254 unpivot=unpivot, 4255 include_nulls=include_nulls, 4256 default_on_null=default_on_null, 4257 ) 4258 4259 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4260 pivot.set("alias", self._parse_table_alias()) 4261 4262 if not unpivot: 4263 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4264 4265 columns: t.List[exp.Expression] = [] 4266 pivot_field_expressions = pivot.args["field"].expressions 4267 4268 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4269 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4270 for fld in pivot_field_expressions: 4271 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4272 for name in names: 4273 if self.PREFIXED_PIVOT_COLUMNS: 4274 name = f"{name}_{field_name}" if name else field_name 4275 else: 4276 name = f"{field_name}_{name}" if name else field_name 4277 4278 columns.append(exp.to_identifier(name)) 4279 4280 pivot.set("columns", columns) 4281 4282 return pivot 4283 4284 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4285 return [agg.alias for agg in aggregations] 4286 4287 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4288 if not skip_where_token and not self._match(TokenType.PREWHERE): 4289 return None 4290 4291 return self.expression( 4292 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4293 ) 4294 4295 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4296 if not skip_where_token and not self._match(TokenType.WHERE): 4297 return None 4298 4299 return self.expression( 4300 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4301 ) 4302 4303 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4304 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4305 return None 4306 4307 elements: t.Dict[str, t.Any] = defaultdict(list) 4308 4309 if self._match(TokenType.ALL): 4310 elements["all"] = True 4311 elif self._match(TokenType.DISTINCT): 4312 elements["all"] = False 4313 4314 while True: 4315 index = self._index 4316 4317 elements["expressions"].extend( 4318 self._parse_csv( 4319 lambda: None 4320 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4321 else self._parse_assignment() 4322 ) 4323 ) 4324 4325 before_with_index = self._index 4326 with_prefix = self._match(TokenType.WITH) 4327 4328 if self._match(TokenType.ROLLUP): 4329 elements["rollup"].append( 4330 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4331 ) 4332 elif self._match(TokenType.CUBE): 4333 elements["cube"].append( 4334 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4335 ) 4336 elif self._match(TokenType.GROUPING_SETS): 4337 elements["grouping_sets"].append( 4338 self.expression( 4339 exp.GroupingSets, 4340 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4341 ) 4342 ) 4343 elif self._match_text_seq("TOTALS"): 4344 elements["totals"] = True # type: ignore 4345 4346 if before_with_index <= self._index <= before_with_index + 1: 4347 self._retreat(before_with_index) 4348 break 4349 4350 if index == self._index: 4351 break 4352 4353 return self.expression(exp.Group, **elements) # type: ignore 4354 4355 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4356 return self.expression( 4357 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4358 ) 4359 4360 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4361 if self._match(TokenType.L_PAREN): 4362 grouping_set = self._parse_csv(self._parse_column) 4363 self._match_r_paren() 4364 return self.expression(exp.Tuple, expressions=grouping_set) 4365 4366 return self._parse_column() 4367 4368 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4369 if not skip_having_token and not self._match(TokenType.HAVING): 4370 return None 4371 return self.expression(exp.Having, this=self._parse_assignment()) 4372 4373 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4374 if not self._match(TokenType.QUALIFY): 4375 return None 4376 return self.expression(exp.Qualify, this=self._parse_assignment()) 4377 4378 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4379 if skip_start_token: 4380 start = None 4381 elif self._match(TokenType.START_WITH): 4382 start = self._parse_assignment() 4383 else: 4384 return None 4385 4386 self._match(TokenType.CONNECT_BY) 4387 nocycle = self._match_text_seq("NOCYCLE") 4388 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4389 exp.Prior, this=self._parse_bitwise() 4390 ) 4391 connect = self._parse_assignment() 4392 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4393 4394 if not start and self._match(TokenType.START_WITH): 4395 start = self._parse_assignment() 4396 4397 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4398 4399 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4400 this = self._parse_id_var(any_token=True) 4401 if self._match(TokenType.ALIAS): 4402 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4403 return this 4404 4405 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4406 if self._match_text_seq("INTERPOLATE"): 4407 return self._parse_wrapped_csv(self._parse_name_as_expression) 4408 return None 4409 4410 def _parse_order( 4411 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4412 ) -> t.Optional[exp.Expression]: 4413 siblings = None 4414 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4415 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4416 return this 4417 4418 siblings = True 4419 4420 return self.expression( 4421 exp.Order, 4422 this=this, 4423 expressions=self._parse_csv(self._parse_ordered), 4424 siblings=siblings, 4425 ) 4426 4427 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4428 if not self._match(token): 4429 return None 4430 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4431 4432 def _parse_ordered( 4433 self, parse_method: t.Optional[t.Callable] = None 4434 ) -> t.Optional[exp.Ordered]: 4435 this = parse_method() if parse_method else self._parse_assignment() 4436 if not this: 4437 return None 4438 4439 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4440 this = exp.var("ALL") 4441 4442 asc = self._match(TokenType.ASC) 4443 desc = self._match(TokenType.DESC) or (asc and False) 4444 4445 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4446 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4447 4448 nulls_first = is_nulls_first or False 4449 explicitly_null_ordered = is_nulls_first or is_nulls_last 4450 4451 if ( 4452 not explicitly_null_ordered 4453 and ( 4454 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4455 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4456 ) 4457 and self.dialect.NULL_ORDERING != "nulls_are_last" 4458 ): 4459 nulls_first = True 4460 4461 if self._match_text_seq("WITH", "FILL"): 4462 with_fill = self.expression( 4463 exp.WithFill, 4464 **{ # type: ignore 4465 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4466 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4467 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4468 "interpolate": self._parse_interpolate(), 4469 }, 4470 ) 4471 else: 4472 with_fill = None 4473 4474 return self.expression( 4475 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4476 ) 4477 4478 def _parse_limit_options(self) -> exp.LimitOptions: 4479 percent = self._match(TokenType.PERCENT) 4480 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4481 self._match_text_seq("ONLY") 4482 with_ties = self._match_text_seq("WITH", "TIES") 4483 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4484 4485 def _parse_limit( 4486 self, 4487 this: t.Optional[exp.Expression] = None, 4488 top: bool = False, 4489 skip_limit_token: bool = False, 4490 ) -> t.Optional[exp.Expression]: 4491 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4492 comments = self._prev_comments 4493 if top: 4494 limit_paren = self._match(TokenType.L_PAREN) 4495 expression = self._parse_term() if limit_paren else self._parse_number() 4496 4497 if limit_paren: 4498 self._match_r_paren() 4499 4500 limit_options = self._parse_limit_options() 4501 else: 4502 limit_options = None 4503 expression = self._parse_term() 4504 4505 if self._match(TokenType.COMMA): 4506 offset = expression 4507 expression = self._parse_term() 4508 else: 4509 offset = None 4510 4511 limit_exp = self.expression( 4512 exp.Limit, 4513 this=this, 4514 expression=expression, 4515 offset=offset, 4516 comments=comments, 4517 limit_options=limit_options, 4518 expressions=self._parse_limit_by(), 4519 ) 4520 4521 return limit_exp 4522 4523 if self._match(TokenType.FETCH): 4524 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4525 direction = self._prev.text.upper() if direction else "FIRST" 4526 4527 count = self._parse_field(tokens=self.FETCH_TOKENS) 4528 4529 return self.expression( 4530 exp.Fetch, 4531 direction=direction, 4532 count=count, 4533 limit_options=self._parse_limit_options(), 4534 ) 4535 4536 return this 4537 4538 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4539 if not self._match(TokenType.OFFSET): 4540 return this 4541 4542 count = self._parse_term() 4543 self._match_set((TokenType.ROW, TokenType.ROWS)) 4544 4545 return self.expression( 4546 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4547 ) 4548 4549 def _can_parse_limit_or_offset(self) -> bool: 4550 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4551 return False 4552 4553 index = self._index 4554 result = bool( 4555 self._try_parse(self._parse_limit, retreat=True) 4556 or self._try_parse(self._parse_offset, retreat=True) 4557 ) 4558 self._retreat(index) 4559 return result 4560 4561 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4562 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4563 4564 def _parse_locks(self) -> t.List[exp.Lock]: 4565 locks = [] 4566 while True: 4567 if self._match_text_seq("FOR", "UPDATE"): 4568 update = True 4569 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4570 "LOCK", "IN", "SHARE", "MODE" 4571 ): 4572 update = False 4573 else: 4574 break 4575 4576 expressions = None 4577 if self._match_text_seq("OF"): 4578 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4579 4580 wait: t.Optional[bool | exp.Expression] = None 4581 if self._match_text_seq("NOWAIT"): 4582 wait = True 4583 elif self._match_text_seq("WAIT"): 4584 wait = self._parse_primary() 4585 elif self._match_text_seq("SKIP", "LOCKED"): 4586 wait = False 4587 4588 locks.append( 4589 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4590 ) 4591 4592 return locks 4593 4594 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4595 while this and self._match_set(self.SET_OPERATIONS): 4596 token_type = self._prev.token_type 4597 4598 if token_type == TokenType.UNION: 4599 operation: t.Type[exp.SetOperation] = exp.Union 4600 elif token_type == TokenType.EXCEPT: 4601 operation = exp.Except 4602 else: 4603 operation = exp.Intersect 4604 4605 comments = self._prev.comments 4606 4607 if self._match(TokenType.DISTINCT): 4608 distinct: t.Optional[bool] = True 4609 elif self._match(TokenType.ALL): 4610 distinct = False 4611 else: 4612 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4613 if distinct is None: 4614 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4615 4616 by_name = self._match_text_seq("BY", "NAME") 4617 expression = self._parse_select(nested=True, parse_set_operation=False) 4618 4619 this = self.expression( 4620 operation, 4621 comments=comments, 4622 this=this, 4623 distinct=distinct, 4624 by_name=by_name, 4625 expression=expression, 4626 ) 4627 4628 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4629 expression = this.expression 4630 4631 if expression: 4632 for arg in self.SET_OP_MODIFIERS: 4633 expr = expression.args.get(arg) 4634 if expr: 4635 this.set(arg, expr.pop()) 4636 4637 return this 4638 4639 def _parse_expression(self) -> t.Optional[exp.Expression]: 4640 return self._parse_alias(self._parse_assignment()) 4641 4642 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4643 this = self._parse_disjunction() 4644 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4645 # This allows us to parse <non-identifier token> := <expr> 4646 this = exp.column( 4647 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4648 ) 4649 4650 while self._match_set(self.ASSIGNMENT): 4651 if isinstance(this, exp.Column) and len(this.parts) == 1: 4652 this = this.this 4653 4654 this = self.expression( 4655 self.ASSIGNMENT[self._prev.token_type], 4656 this=this, 4657 comments=self._prev_comments, 4658 expression=self._parse_assignment(), 4659 ) 4660 4661 return this 4662 4663 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4664 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4665 4666 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4667 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4668 4669 def _parse_equality(self) -> t.Optional[exp.Expression]: 4670 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4671 4672 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4673 return self._parse_tokens(self._parse_range, self.COMPARISON) 4674 4675 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4676 this = this or self._parse_bitwise() 4677 negate = self._match(TokenType.NOT) 4678 4679 if self._match_set(self.RANGE_PARSERS): 4680 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4681 if not expression: 4682 return this 4683 4684 this = expression 4685 elif self._match(TokenType.ISNULL): 4686 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4687 4688 # Postgres supports ISNULL and NOTNULL for conditions. 4689 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4690 if self._match(TokenType.NOTNULL): 4691 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4692 this = self.expression(exp.Not, this=this) 4693 4694 if negate: 4695 this = self._negate_range(this) 4696 4697 if self._match(TokenType.IS): 4698 this = self._parse_is(this) 4699 4700 return this 4701 4702 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4703 if not this: 4704 return this 4705 4706 return self.expression(exp.Not, this=this) 4707 4708 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4709 index = self._index - 1 4710 negate = self._match(TokenType.NOT) 4711 4712 if self._match_text_seq("DISTINCT", "FROM"): 4713 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4714 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4715 4716 if self._match(TokenType.JSON): 4717 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4718 4719 if self._match_text_seq("WITH"): 4720 _with = True 4721 elif self._match_text_seq("WITHOUT"): 4722 _with = False 4723 else: 4724 _with = None 4725 4726 unique = self._match(TokenType.UNIQUE) 4727 self._match_text_seq("KEYS") 4728 expression: t.Optional[exp.Expression] = self.expression( 4729 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4730 ) 4731 else: 4732 expression = self._parse_primary() or self._parse_null() 4733 if not expression: 4734 self._retreat(index) 4735 return None 4736 4737 this = self.expression(exp.Is, this=this, expression=expression) 4738 return self.expression(exp.Not, this=this) if negate else this 4739 4740 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4741 unnest = self._parse_unnest(with_alias=False) 4742 if unnest: 4743 this = self.expression(exp.In, this=this, unnest=unnest) 4744 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4745 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4746 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4747 4748 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4749 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4750 else: 4751 this = self.expression(exp.In, this=this, expressions=expressions) 4752 4753 if matched_l_paren: 4754 self._match_r_paren(this) 4755 elif not self._match(TokenType.R_BRACKET, expression=this): 4756 self.raise_error("Expecting ]") 4757 else: 4758 this = self.expression(exp.In, this=this, field=self._parse_column()) 4759 4760 return this 4761 4762 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4763 low = self._parse_bitwise() 4764 self._match(TokenType.AND) 4765 high = self._parse_bitwise() 4766 return self.expression(exp.Between, this=this, low=low, high=high) 4767 4768 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4769 if not self._match(TokenType.ESCAPE): 4770 return this 4771 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4772 4773 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4774 index = self._index 4775 4776 if not self._match(TokenType.INTERVAL) and match_interval: 4777 return None 4778 4779 if self._match(TokenType.STRING, advance=False): 4780 this = self._parse_primary() 4781 else: 4782 this = self._parse_term() 4783 4784 if not this or ( 4785 isinstance(this, exp.Column) 4786 and not this.table 4787 and not this.this.quoted 4788 and this.name.upper() == "IS" 4789 ): 4790 self._retreat(index) 4791 return None 4792 4793 unit = self._parse_function() or ( 4794 not self._match(TokenType.ALIAS, advance=False) 4795 and self._parse_var(any_token=True, upper=True) 4796 ) 4797 4798 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4799 # each INTERVAL expression into this canonical form so it's easy to transpile 4800 if this and this.is_number: 4801 this = exp.Literal.string(this.to_py()) 4802 elif this and this.is_string: 4803 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4804 if parts and unit: 4805 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4806 unit = None 4807 self._retreat(self._index - 1) 4808 4809 if len(parts) == 1: 4810 this = exp.Literal.string(parts[0][0]) 4811 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4812 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4813 unit = self.expression( 4814 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4815 ) 4816 4817 interval = self.expression(exp.Interval, this=this, unit=unit) 4818 4819 index = self._index 4820 self._match(TokenType.PLUS) 4821 4822 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4823 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4824 return self.expression( 4825 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4826 ) 4827 4828 self._retreat(index) 4829 return interval 4830 4831 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4832 this = self._parse_term() 4833 4834 while True: 4835 if self._match_set(self.BITWISE): 4836 this = self.expression( 4837 self.BITWISE[self._prev.token_type], 4838 this=this, 4839 expression=self._parse_term(), 4840 ) 4841 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4842 this = self.expression( 4843 exp.DPipe, 4844 this=this, 4845 expression=self._parse_term(), 4846 safe=not self.dialect.STRICT_STRING_CONCAT, 4847 ) 4848 elif self._match(TokenType.DQMARK): 4849 this = self.expression( 4850 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4851 ) 4852 elif self._match_pair(TokenType.LT, TokenType.LT): 4853 this = self.expression( 4854 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4855 ) 4856 elif self._match_pair(TokenType.GT, TokenType.GT): 4857 this = self.expression( 4858 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4859 ) 4860 else: 4861 break 4862 4863 return this 4864 4865 def _parse_term(self) -> t.Optional[exp.Expression]: 4866 this = self._parse_factor() 4867 4868 while self._match_set(self.TERM): 4869 klass = self.TERM[self._prev.token_type] 4870 comments = self._prev_comments 4871 expression = self._parse_factor() 4872 4873 this = self.expression(klass, this=this, comments=comments, expression=expression) 4874 4875 if isinstance(this, exp.Collate): 4876 expr = this.expression 4877 4878 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4879 # fallback to Identifier / Var 4880 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4881 ident = expr.this 4882 if isinstance(ident, exp.Identifier): 4883 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4884 4885 return this 4886 4887 def _parse_factor(self) -> t.Optional[exp.Expression]: 4888 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4889 this = parse_method() 4890 4891 while self._match_set(self.FACTOR): 4892 klass = self.FACTOR[self._prev.token_type] 4893 comments = self._prev_comments 4894 expression = parse_method() 4895 4896 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4897 self._retreat(self._index - 1) 4898 return this 4899 4900 this = self.expression(klass, this=this, comments=comments, expression=expression) 4901 4902 if isinstance(this, exp.Div): 4903 this.args["typed"] = self.dialect.TYPED_DIVISION 4904 this.args["safe"] = self.dialect.SAFE_DIVISION 4905 4906 return this 4907 4908 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4909 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4910 4911 def _parse_unary(self) -> t.Optional[exp.Expression]: 4912 if self._match_set(self.UNARY_PARSERS): 4913 return self.UNARY_PARSERS[self._prev.token_type](self) 4914 return self._parse_at_time_zone(self._parse_type()) 4915 4916 def _parse_type( 4917 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4918 ) -> t.Optional[exp.Expression]: 4919 interval = parse_interval and self._parse_interval() 4920 if interval: 4921 return interval 4922 4923 index = self._index 4924 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4925 4926 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4927 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4928 if isinstance(data_type, exp.Cast): 4929 # This constructor can contain ops directly after it, for instance struct unnesting: 4930 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4931 return self._parse_column_ops(data_type) 4932 4933 if data_type: 4934 index2 = self._index 4935 this = self._parse_primary() 4936 4937 if isinstance(this, exp.Literal): 4938 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4939 if parser: 4940 return parser(self, this, data_type) 4941 4942 return self.expression(exp.Cast, this=this, to=data_type) 4943 4944 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4945 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4946 # 4947 # If the index difference here is greater than 1, that means the parser itself must have 4948 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4949 # 4950 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4951 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4952 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4953 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4954 # 4955 # In these cases, we don't really want to return the converted type, but instead retreat 4956 # and try to parse a Column or Identifier in the section below. 4957 if data_type.expressions and index2 - index > 1: 4958 self._retreat(index2) 4959 return self._parse_column_ops(data_type) 4960 4961 self._retreat(index) 4962 4963 if fallback_to_identifier: 4964 return self._parse_id_var() 4965 4966 this = self._parse_column() 4967 return this and self._parse_column_ops(this) 4968 4969 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4970 this = self._parse_type() 4971 if not this: 4972 return None 4973 4974 if isinstance(this, exp.Column) and not this.table: 4975 this = exp.var(this.name.upper()) 4976 4977 return self.expression( 4978 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4979 ) 4980 4981 def _parse_types( 4982 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4983 ) -> t.Optional[exp.Expression]: 4984 index = self._index 4985 4986 this: t.Optional[exp.Expression] = None 4987 prefix = self._match_text_seq("SYSUDTLIB", ".") 4988 4989 if not self._match_set(self.TYPE_TOKENS): 4990 identifier = allow_identifiers and self._parse_id_var( 4991 any_token=False, tokens=(TokenType.VAR,) 4992 ) 4993 if isinstance(identifier, exp.Identifier): 4994 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4995 4996 if len(tokens) != 1: 4997 self.raise_error("Unexpected identifier", self._prev) 4998 4999 if tokens[0].token_type in self.TYPE_TOKENS: 5000 self._prev = tokens[0] 5001 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5002 type_name = identifier.name 5003 5004 while self._match(TokenType.DOT): 5005 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5006 5007 this = exp.DataType.build(type_name, udt=True) 5008 else: 5009 self._retreat(self._index - 1) 5010 return None 5011 else: 5012 return None 5013 5014 type_token = self._prev.token_type 5015 5016 if type_token == TokenType.PSEUDO_TYPE: 5017 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5018 5019 if type_token == TokenType.OBJECT_IDENTIFIER: 5020 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5021 5022 # https://materialize.com/docs/sql/types/map/ 5023 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5024 key_type = self._parse_types( 5025 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5026 ) 5027 if not self._match(TokenType.FARROW): 5028 self._retreat(index) 5029 return None 5030 5031 value_type = self._parse_types( 5032 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5033 ) 5034 if not self._match(TokenType.R_BRACKET): 5035 self._retreat(index) 5036 return None 5037 5038 return exp.DataType( 5039 this=exp.DataType.Type.MAP, 5040 expressions=[key_type, value_type], 5041 nested=True, 5042 prefix=prefix, 5043 ) 5044 5045 nested = type_token in self.NESTED_TYPE_TOKENS 5046 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5047 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5048 expressions = None 5049 maybe_func = False 5050 5051 if self._match(TokenType.L_PAREN): 5052 if is_struct: 5053 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5054 elif nested: 5055 expressions = self._parse_csv( 5056 lambda: self._parse_types( 5057 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5058 ) 5059 ) 5060 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5061 this = expressions[0] 5062 this.set("nullable", True) 5063 self._match_r_paren() 5064 return this 5065 elif type_token in self.ENUM_TYPE_TOKENS: 5066 expressions = self._parse_csv(self._parse_equality) 5067 elif is_aggregate: 5068 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5069 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5070 ) 5071 if not func_or_ident: 5072 return None 5073 expressions = [func_or_ident] 5074 if self._match(TokenType.COMMA): 5075 expressions.extend( 5076 self._parse_csv( 5077 lambda: self._parse_types( 5078 check_func=check_func, 5079 schema=schema, 5080 allow_identifiers=allow_identifiers, 5081 ) 5082 ) 5083 ) 5084 else: 5085 expressions = self._parse_csv(self._parse_type_size) 5086 5087 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5088 if type_token == TokenType.VECTOR and len(expressions) == 2: 5089 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5090 5091 if not expressions or not self._match(TokenType.R_PAREN): 5092 self._retreat(index) 5093 return None 5094 5095 maybe_func = True 5096 5097 values: t.Optional[t.List[exp.Expression]] = None 5098 5099 if nested and self._match(TokenType.LT): 5100 if is_struct: 5101 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5102 else: 5103 expressions = self._parse_csv( 5104 lambda: self._parse_types( 5105 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5106 ) 5107 ) 5108 5109 if not self._match(TokenType.GT): 5110 self.raise_error("Expecting >") 5111 5112 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5113 values = self._parse_csv(self._parse_assignment) 5114 if not values and is_struct: 5115 values = None 5116 self._retreat(self._index - 1) 5117 else: 5118 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5119 5120 if type_token in self.TIMESTAMPS: 5121 if self._match_text_seq("WITH", "TIME", "ZONE"): 5122 maybe_func = False 5123 tz_type = ( 5124 exp.DataType.Type.TIMETZ 5125 if type_token in self.TIMES 5126 else exp.DataType.Type.TIMESTAMPTZ 5127 ) 5128 this = exp.DataType(this=tz_type, expressions=expressions) 5129 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5130 maybe_func = False 5131 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5132 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5133 maybe_func = False 5134 elif type_token == TokenType.INTERVAL: 5135 unit = self._parse_var(upper=True) 5136 if unit: 5137 if self._match_text_seq("TO"): 5138 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5139 5140 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5141 else: 5142 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5143 5144 if maybe_func and check_func: 5145 index2 = self._index 5146 peek = self._parse_string() 5147 5148 if not peek: 5149 self._retreat(index) 5150 return None 5151 5152 self._retreat(index2) 5153 5154 if not this: 5155 if self._match_text_seq("UNSIGNED"): 5156 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5157 if not unsigned_type_token: 5158 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5159 5160 type_token = unsigned_type_token or type_token 5161 5162 this = exp.DataType( 5163 this=exp.DataType.Type[type_token.value], 5164 expressions=expressions, 5165 nested=nested, 5166 prefix=prefix, 5167 ) 5168 5169 # Empty arrays/structs are allowed 5170 if values is not None: 5171 cls = exp.Struct if is_struct else exp.Array 5172 this = exp.cast(cls(expressions=values), this, copy=False) 5173 5174 elif expressions: 5175 this.set("expressions", expressions) 5176 5177 # https://materialize.com/docs/sql/types/list/#type-name 5178 while self._match(TokenType.LIST): 5179 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5180 5181 index = self._index 5182 5183 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5184 matched_array = self._match(TokenType.ARRAY) 5185 5186 while self._curr: 5187 datatype_token = self._prev.token_type 5188 matched_l_bracket = self._match(TokenType.L_BRACKET) 5189 5190 if (not matched_l_bracket and not matched_array) or ( 5191 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5192 ): 5193 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5194 # not to be confused with the fixed size array parsing 5195 break 5196 5197 matched_array = False 5198 values = self._parse_csv(self._parse_assignment) or None 5199 if ( 5200 values 5201 and not schema 5202 and ( 5203 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5204 ) 5205 ): 5206 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5207 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5208 self._retreat(index) 5209 break 5210 5211 this = exp.DataType( 5212 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5213 ) 5214 self._match(TokenType.R_BRACKET) 5215 5216 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5217 converter = self.TYPE_CONVERTERS.get(this.this) 5218 if converter: 5219 this = converter(t.cast(exp.DataType, this)) 5220 5221 return this 5222 5223 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5224 index = self._index 5225 5226 if ( 5227 self._curr 5228 and self._next 5229 and self._curr.token_type in self.TYPE_TOKENS 5230 and self._next.token_type in self.TYPE_TOKENS 5231 ): 5232 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5233 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5234 this = self._parse_id_var() 5235 else: 5236 this = ( 5237 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5238 or self._parse_id_var() 5239 ) 5240 5241 self._match(TokenType.COLON) 5242 5243 if ( 5244 type_required 5245 and not isinstance(this, exp.DataType) 5246 and not self._match_set(self.TYPE_TOKENS, advance=False) 5247 ): 5248 self._retreat(index) 5249 return self._parse_types() 5250 5251 return self._parse_column_def(this) 5252 5253 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5254 if not self._match_text_seq("AT", "TIME", "ZONE"): 5255 return this 5256 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5257 5258 def _parse_column(self) -> t.Optional[exp.Expression]: 5259 this = self._parse_column_reference() 5260 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5261 5262 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5263 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5264 5265 return column 5266 5267 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5268 this = self._parse_field() 5269 if ( 5270 not this 5271 and self._match(TokenType.VALUES, advance=False) 5272 and self.VALUES_FOLLOWED_BY_PAREN 5273 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5274 ): 5275 this = self._parse_id_var() 5276 5277 if isinstance(this, exp.Identifier): 5278 # We bubble up comments from the Identifier to the Column 5279 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5280 5281 return this 5282 5283 def _parse_colon_as_variant_extract( 5284 self, this: t.Optional[exp.Expression] 5285 ) -> t.Optional[exp.Expression]: 5286 casts = [] 5287 json_path = [] 5288 escape = None 5289 5290 while self._match(TokenType.COLON): 5291 start_index = self._index 5292 5293 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5294 path = self._parse_column_ops( 5295 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5296 ) 5297 5298 # The cast :: operator has a lower precedence than the extraction operator :, so 5299 # we rearrange the AST appropriately to avoid casting the JSON path 5300 while isinstance(path, exp.Cast): 5301 casts.append(path.to) 5302 path = path.this 5303 5304 if casts: 5305 dcolon_offset = next( 5306 i 5307 for i, t in enumerate(self._tokens[start_index:]) 5308 if t.token_type == TokenType.DCOLON 5309 ) 5310 end_token = self._tokens[start_index + dcolon_offset - 1] 5311 else: 5312 end_token = self._prev 5313 5314 if path: 5315 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5316 # it'll roundtrip to a string literal in GET_PATH 5317 if isinstance(path, exp.Identifier) and path.quoted: 5318 escape = True 5319 5320 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5321 5322 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5323 # Databricks transforms it back to the colon/dot notation 5324 if json_path: 5325 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5326 5327 if json_path_expr: 5328 json_path_expr.set("escape", escape) 5329 5330 this = self.expression( 5331 exp.JSONExtract, 5332 this=this, 5333 expression=json_path_expr, 5334 variant_extract=True, 5335 ) 5336 5337 while casts: 5338 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5339 5340 return this 5341 5342 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5343 return self._parse_types() 5344 5345 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5346 this = self._parse_bracket(this) 5347 5348 while self._match_set(self.COLUMN_OPERATORS): 5349 op_token = self._prev.token_type 5350 op = self.COLUMN_OPERATORS.get(op_token) 5351 5352 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5353 field = self._parse_dcolon() 5354 if not field: 5355 self.raise_error("Expected type") 5356 elif op and self._curr: 5357 field = self._parse_column_reference() or self._parse_bracket() 5358 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5359 field = self._parse_column_ops(field) 5360 else: 5361 field = self._parse_field(any_token=True, anonymous_func=True) 5362 5363 if isinstance(field, (exp.Func, exp.Window)) and this: 5364 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5365 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5366 this = exp.replace_tree( 5367 this, 5368 lambda n: ( 5369 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5370 if n.table 5371 else n.this 5372 ) 5373 if isinstance(n, exp.Column) 5374 else n, 5375 ) 5376 5377 if op: 5378 this = op(self, this, field) 5379 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5380 this = self.expression( 5381 exp.Column, 5382 comments=this.comments, 5383 this=field, 5384 table=this.this, 5385 db=this.args.get("table"), 5386 catalog=this.args.get("db"), 5387 ) 5388 elif isinstance(field, exp.Window): 5389 # Move the exp.Dot's to the window's function 5390 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5391 field.set("this", window_func) 5392 this = field 5393 else: 5394 this = self.expression(exp.Dot, this=this, expression=field) 5395 5396 if field and field.comments: 5397 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5398 5399 this = self._parse_bracket(this) 5400 5401 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5402 5403 def _parse_primary(self) -> t.Optional[exp.Expression]: 5404 if self._match_set(self.PRIMARY_PARSERS): 5405 token_type = self._prev.token_type 5406 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5407 5408 if token_type == TokenType.STRING: 5409 expressions = [primary] 5410 while self._match(TokenType.STRING): 5411 expressions.append(exp.Literal.string(self._prev.text)) 5412 5413 if len(expressions) > 1: 5414 return self.expression(exp.Concat, expressions=expressions) 5415 5416 return primary 5417 5418 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5419 return exp.Literal.number(f"0.{self._prev.text}") 5420 5421 if self._match(TokenType.L_PAREN): 5422 comments = self._prev_comments 5423 query = self._parse_select() 5424 5425 if query: 5426 expressions = [query] 5427 else: 5428 expressions = self._parse_expressions() 5429 5430 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5431 5432 if not this and self._match(TokenType.R_PAREN, advance=False): 5433 this = self.expression(exp.Tuple) 5434 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5435 this = self._parse_subquery(this=this, parse_alias=False) 5436 elif isinstance(this, exp.Subquery): 5437 this = self._parse_subquery( 5438 this=self._parse_set_operations(this), parse_alias=False 5439 ) 5440 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5441 this = self.expression(exp.Tuple, expressions=expressions) 5442 else: 5443 this = self.expression(exp.Paren, this=this) 5444 5445 if this: 5446 this.add_comments(comments) 5447 5448 self._match_r_paren(expression=this) 5449 return this 5450 5451 return None 5452 5453 def _parse_field( 5454 self, 5455 any_token: bool = False, 5456 tokens: t.Optional[t.Collection[TokenType]] = None, 5457 anonymous_func: bool = False, 5458 ) -> t.Optional[exp.Expression]: 5459 if anonymous_func: 5460 field = ( 5461 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5462 or self._parse_primary() 5463 ) 5464 else: 5465 field = self._parse_primary() or self._parse_function( 5466 anonymous=anonymous_func, any_token=any_token 5467 ) 5468 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5469 5470 def _parse_function( 5471 self, 5472 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5473 anonymous: bool = False, 5474 optional_parens: bool = True, 5475 any_token: bool = False, 5476 ) -> t.Optional[exp.Expression]: 5477 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5478 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5479 fn_syntax = False 5480 if ( 5481 self._match(TokenType.L_BRACE, advance=False) 5482 and self._next 5483 and self._next.text.upper() == "FN" 5484 ): 5485 self._advance(2) 5486 fn_syntax = True 5487 5488 func = self._parse_function_call( 5489 functions=functions, 5490 anonymous=anonymous, 5491 optional_parens=optional_parens, 5492 any_token=any_token, 5493 ) 5494 5495 if fn_syntax: 5496 self._match(TokenType.R_BRACE) 5497 5498 return func 5499 5500 def _parse_function_call( 5501 self, 5502 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5503 anonymous: bool = False, 5504 optional_parens: bool = True, 5505 any_token: bool = False, 5506 ) -> t.Optional[exp.Expression]: 5507 if not self._curr: 5508 return None 5509 5510 comments = self._curr.comments 5511 token_type = self._curr.token_type 5512 this = self._curr.text 5513 upper = this.upper() 5514 5515 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5516 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5517 self._advance() 5518 return self._parse_window(parser(self)) 5519 5520 if not self._next or self._next.token_type != TokenType.L_PAREN: 5521 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5522 self._advance() 5523 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5524 5525 return None 5526 5527 if any_token: 5528 if token_type in self.RESERVED_TOKENS: 5529 return None 5530 elif token_type not in self.FUNC_TOKENS: 5531 return None 5532 5533 self._advance(2) 5534 5535 parser = self.FUNCTION_PARSERS.get(upper) 5536 if parser and not anonymous: 5537 this = parser(self) 5538 else: 5539 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5540 5541 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5542 this = self.expression( 5543 subquery_predicate, comments=comments, this=self._parse_select() 5544 ) 5545 self._match_r_paren() 5546 return this 5547 5548 if functions is None: 5549 functions = self.FUNCTIONS 5550 5551 function = functions.get(upper) 5552 known_function = function and not anonymous 5553 5554 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5555 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5556 5557 post_func_comments = self._curr and self._curr.comments 5558 if known_function and post_func_comments: 5559 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5560 # call we'll construct it as exp.Anonymous, even if it's "known" 5561 if any( 5562 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5563 for comment in post_func_comments 5564 ): 5565 known_function = False 5566 5567 if alias and known_function: 5568 args = self._kv_to_prop_eq(args) 5569 5570 if known_function: 5571 func_builder = t.cast(t.Callable, function) 5572 5573 if "dialect" in func_builder.__code__.co_varnames: 5574 func = func_builder(args, dialect=self.dialect) 5575 else: 5576 func = func_builder(args) 5577 5578 func = self.validate_expression(func, args) 5579 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5580 func.meta["name"] = this 5581 5582 this = func 5583 else: 5584 if token_type == TokenType.IDENTIFIER: 5585 this = exp.Identifier(this=this, quoted=True) 5586 this = self.expression(exp.Anonymous, this=this, expressions=args) 5587 5588 if isinstance(this, exp.Expression): 5589 this.add_comments(comments) 5590 5591 self._match_r_paren(this) 5592 return self._parse_window(this) 5593 5594 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5595 return expression 5596 5597 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5598 transformed = [] 5599 5600 for index, e in enumerate(expressions): 5601 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5602 if isinstance(e, exp.Alias): 5603 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5604 5605 if not isinstance(e, exp.PropertyEQ): 5606 e = self.expression( 5607 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5608 ) 5609 5610 if isinstance(e.this, exp.Column): 5611 e.this.replace(e.this.this) 5612 else: 5613 e = self._to_prop_eq(e, index) 5614 5615 transformed.append(e) 5616 5617 return transformed 5618 5619 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5620 return self._parse_statement() 5621 5622 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5623 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5624 5625 def _parse_user_defined_function( 5626 self, kind: t.Optional[TokenType] = None 5627 ) -> t.Optional[exp.Expression]: 5628 this = self._parse_table_parts(schema=True) 5629 5630 if not self._match(TokenType.L_PAREN): 5631 return this 5632 5633 expressions = self._parse_csv(self._parse_function_parameter) 5634 self._match_r_paren() 5635 return self.expression( 5636 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5637 ) 5638 5639 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5640 literal = self._parse_primary() 5641 if literal: 5642 return self.expression(exp.Introducer, this=token.text, expression=literal) 5643 5644 return self.expression(exp.Identifier, this=token.text) 5645 5646 def _parse_session_parameter(self) -> exp.SessionParameter: 5647 kind = None 5648 this = self._parse_id_var() or self._parse_primary() 5649 5650 if this and self._match(TokenType.DOT): 5651 kind = this.name 5652 this = self._parse_var() or self._parse_primary() 5653 5654 return self.expression(exp.SessionParameter, this=this, kind=kind) 5655 5656 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5657 return self._parse_id_var() 5658 5659 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5660 index = self._index 5661 5662 if self._match(TokenType.L_PAREN): 5663 expressions = t.cast( 5664 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5665 ) 5666 5667 if not self._match(TokenType.R_PAREN): 5668 self._retreat(index) 5669 else: 5670 expressions = [self._parse_lambda_arg()] 5671 5672 if self._match_set(self.LAMBDAS): 5673 return self.LAMBDAS[self._prev.token_type](self, expressions) 5674 5675 self._retreat(index) 5676 5677 this: t.Optional[exp.Expression] 5678 5679 if self._match(TokenType.DISTINCT): 5680 this = self.expression( 5681 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5682 ) 5683 else: 5684 this = self._parse_select_or_expression(alias=alias) 5685 5686 return self._parse_limit( 5687 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5688 ) 5689 5690 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5691 index = self._index 5692 if not self._match(TokenType.L_PAREN): 5693 return this 5694 5695 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5696 # expr can be of both types 5697 if self._match_set(self.SELECT_START_TOKENS): 5698 self._retreat(index) 5699 return this 5700 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5701 self._match_r_paren() 5702 return self.expression(exp.Schema, this=this, expressions=args) 5703 5704 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5705 return self._parse_column_def(self._parse_field(any_token=True)) 5706 5707 def _parse_column_def( 5708 self, this: t.Optional[exp.Expression], computed_column: bool = True 5709 ) -> t.Optional[exp.Expression]: 5710 # column defs are not really columns, they're identifiers 5711 if isinstance(this, exp.Column): 5712 this = this.this 5713 5714 if not computed_column: 5715 self._match(TokenType.ALIAS) 5716 5717 kind = self._parse_types(schema=True) 5718 5719 if self._match_text_seq("FOR", "ORDINALITY"): 5720 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5721 5722 constraints: t.List[exp.Expression] = [] 5723 5724 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5725 ("ALIAS", "MATERIALIZED") 5726 ): 5727 persisted = self._prev.text.upper() == "MATERIALIZED" 5728 constraint_kind = exp.ComputedColumnConstraint( 5729 this=self._parse_assignment(), 5730 persisted=persisted or self._match_text_seq("PERSISTED"), 5731 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5732 ) 5733 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5734 elif ( 5735 kind 5736 and self._match(TokenType.ALIAS, advance=False) 5737 and ( 5738 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5739 or (self._next and self._next.token_type == TokenType.L_PAREN) 5740 ) 5741 ): 5742 self._advance() 5743 constraints.append( 5744 self.expression( 5745 exp.ColumnConstraint, 5746 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5747 ) 5748 ) 5749 5750 while True: 5751 constraint = self._parse_column_constraint() 5752 if not constraint: 5753 break 5754 constraints.append(constraint) 5755 5756 if not kind and not constraints: 5757 return this 5758 5759 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5760 5761 def _parse_auto_increment( 5762 self, 5763 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5764 start = None 5765 increment = None 5766 5767 if self._match(TokenType.L_PAREN, advance=False): 5768 args = self._parse_wrapped_csv(self._parse_bitwise) 5769 start = seq_get(args, 0) 5770 increment = seq_get(args, 1) 5771 elif self._match_text_seq("START"): 5772 start = self._parse_bitwise() 5773 self._match_text_seq("INCREMENT") 5774 increment = self._parse_bitwise() 5775 5776 if start and increment: 5777 return exp.GeneratedAsIdentityColumnConstraint( 5778 start=start, increment=increment, this=False 5779 ) 5780 5781 return exp.AutoIncrementColumnConstraint() 5782 5783 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5784 if not self._match_text_seq("REFRESH"): 5785 self._retreat(self._index - 1) 5786 return None 5787 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5788 5789 def _parse_compress(self) -> exp.CompressColumnConstraint: 5790 if self._match(TokenType.L_PAREN, advance=False): 5791 return self.expression( 5792 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5793 ) 5794 5795 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5796 5797 def _parse_generated_as_identity( 5798 self, 5799 ) -> ( 5800 exp.GeneratedAsIdentityColumnConstraint 5801 | exp.ComputedColumnConstraint 5802 | exp.GeneratedAsRowColumnConstraint 5803 ): 5804 if self._match_text_seq("BY", "DEFAULT"): 5805 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5806 this = self.expression( 5807 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5808 ) 5809 else: 5810 self._match_text_seq("ALWAYS") 5811 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5812 5813 self._match(TokenType.ALIAS) 5814 5815 if self._match_text_seq("ROW"): 5816 start = self._match_text_seq("START") 5817 if not start: 5818 self._match(TokenType.END) 5819 hidden = self._match_text_seq("HIDDEN") 5820 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5821 5822 identity = self._match_text_seq("IDENTITY") 5823 5824 if self._match(TokenType.L_PAREN): 5825 if self._match(TokenType.START_WITH): 5826 this.set("start", self._parse_bitwise()) 5827 if self._match_text_seq("INCREMENT", "BY"): 5828 this.set("increment", self._parse_bitwise()) 5829 if self._match_text_seq("MINVALUE"): 5830 this.set("minvalue", self._parse_bitwise()) 5831 if self._match_text_seq("MAXVALUE"): 5832 this.set("maxvalue", self._parse_bitwise()) 5833 5834 if self._match_text_seq("CYCLE"): 5835 this.set("cycle", True) 5836 elif self._match_text_seq("NO", "CYCLE"): 5837 this.set("cycle", False) 5838 5839 if not identity: 5840 this.set("expression", self._parse_range()) 5841 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5842 args = self._parse_csv(self._parse_bitwise) 5843 this.set("start", seq_get(args, 0)) 5844 this.set("increment", seq_get(args, 1)) 5845 5846 self._match_r_paren() 5847 5848 return this 5849 5850 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5851 self._match_text_seq("LENGTH") 5852 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5853 5854 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5855 if self._match_text_seq("NULL"): 5856 return self.expression(exp.NotNullColumnConstraint) 5857 if self._match_text_seq("CASESPECIFIC"): 5858 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5859 if self._match_text_seq("FOR", "REPLICATION"): 5860 return self.expression(exp.NotForReplicationColumnConstraint) 5861 5862 # Unconsume the `NOT` token 5863 self._retreat(self._index - 1) 5864 return None 5865 5866 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5867 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5868 5869 procedure_option_follows = ( 5870 self._match(TokenType.WITH, advance=False) 5871 and self._next 5872 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5873 ) 5874 5875 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5876 return self.expression( 5877 exp.ColumnConstraint, 5878 this=this, 5879 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5880 ) 5881 5882 return this 5883 5884 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5885 if not self._match(TokenType.CONSTRAINT): 5886 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5887 5888 return self.expression( 5889 exp.Constraint, 5890 this=self._parse_id_var(), 5891 expressions=self._parse_unnamed_constraints(), 5892 ) 5893 5894 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5895 constraints = [] 5896 while True: 5897 constraint = self._parse_unnamed_constraint() or self._parse_function() 5898 if not constraint: 5899 break 5900 constraints.append(constraint) 5901 5902 return constraints 5903 5904 def _parse_unnamed_constraint( 5905 self, constraints: t.Optional[t.Collection[str]] = None 5906 ) -> t.Optional[exp.Expression]: 5907 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5908 constraints or self.CONSTRAINT_PARSERS 5909 ): 5910 return None 5911 5912 constraint = self._prev.text.upper() 5913 if constraint not in self.CONSTRAINT_PARSERS: 5914 self.raise_error(f"No parser found for schema constraint {constraint}.") 5915 5916 return self.CONSTRAINT_PARSERS[constraint](self) 5917 5918 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5919 return self._parse_id_var(any_token=False) 5920 5921 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5922 self._match_text_seq("KEY") 5923 return self.expression( 5924 exp.UniqueColumnConstraint, 5925 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5926 this=self._parse_schema(self._parse_unique_key()), 5927 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5928 on_conflict=self._parse_on_conflict(), 5929 ) 5930 5931 def _parse_key_constraint_options(self) -> t.List[str]: 5932 options = [] 5933 while True: 5934 if not self._curr: 5935 break 5936 5937 if self._match(TokenType.ON): 5938 action = None 5939 on = self._advance_any() and self._prev.text 5940 5941 if self._match_text_seq("NO", "ACTION"): 5942 action = "NO ACTION" 5943 elif self._match_text_seq("CASCADE"): 5944 action = "CASCADE" 5945 elif self._match_text_seq("RESTRICT"): 5946 action = "RESTRICT" 5947 elif self._match_pair(TokenType.SET, TokenType.NULL): 5948 action = "SET NULL" 5949 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5950 action = "SET DEFAULT" 5951 else: 5952 self.raise_error("Invalid key constraint") 5953 5954 options.append(f"ON {on} {action}") 5955 else: 5956 var = self._parse_var_from_options( 5957 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5958 ) 5959 if not var: 5960 break 5961 options.append(var.name) 5962 5963 return options 5964 5965 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5966 if match and not self._match(TokenType.REFERENCES): 5967 return None 5968 5969 expressions = None 5970 this = self._parse_table(schema=True) 5971 options = self._parse_key_constraint_options() 5972 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5973 5974 def _parse_foreign_key(self) -> exp.ForeignKey: 5975 expressions = self._parse_wrapped_id_vars() 5976 reference = self._parse_references() 5977 options = {} 5978 5979 while self._match(TokenType.ON): 5980 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5981 self.raise_error("Expected DELETE or UPDATE") 5982 5983 kind = self._prev.text.lower() 5984 5985 if self._match_text_seq("NO", "ACTION"): 5986 action = "NO ACTION" 5987 elif self._match(TokenType.SET): 5988 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5989 action = "SET " + self._prev.text.upper() 5990 else: 5991 self._advance() 5992 action = self._prev.text.upper() 5993 5994 options[kind] = action 5995 5996 return self.expression( 5997 exp.ForeignKey, 5998 expressions=expressions, 5999 reference=reference, 6000 **options, # type: ignore 6001 ) 6002 6003 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6004 return self._parse_ordered() or self._parse_field() 6005 6006 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6007 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6008 self._retreat(self._index - 1) 6009 return None 6010 6011 id_vars = self._parse_wrapped_id_vars() 6012 return self.expression( 6013 exp.PeriodForSystemTimeConstraint, 6014 this=seq_get(id_vars, 0), 6015 expression=seq_get(id_vars, 1), 6016 ) 6017 6018 def _parse_primary_key( 6019 self, wrapped_optional: bool = False, in_props: bool = False 6020 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6021 desc = ( 6022 self._match_set((TokenType.ASC, TokenType.DESC)) 6023 and self._prev.token_type == TokenType.DESC 6024 ) 6025 6026 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6027 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6028 6029 expressions = self._parse_wrapped_csv( 6030 self._parse_primary_key_part, optional=wrapped_optional 6031 ) 6032 options = self._parse_key_constraint_options() 6033 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6034 6035 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6036 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6037 6038 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6039 """ 6040 Parses a datetime column in ODBC format. We parse the column into the corresponding 6041 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6042 same as we did for `DATE('yyyy-mm-dd')`. 6043 6044 Reference: 6045 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6046 """ 6047 self._match(TokenType.VAR) 6048 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6049 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6050 if not self._match(TokenType.R_BRACE): 6051 self.raise_error("Expected }") 6052 return expression 6053 6054 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6055 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6056 return this 6057 6058 bracket_kind = self._prev.token_type 6059 if ( 6060 bracket_kind == TokenType.L_BRACE 6061 and self._curr 6062 and self._curr.token_type == TokenType.VAR 6063 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6064 ): 6065 return self._parse_odbc_datetime_literal() 6066 6067 expressions = self._parse_csv( 6068 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6069 ) 6070 6071 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6072 self.raise_error("Expected ]") 6073 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6074 self.raise_error("Expected }") 6075 6076 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6077 if bracket_kind == TokenType.L_BRACE: 6078 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6079 elif not this: 6080 this = build_array_constructor( 6081 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6082 ) 6083 else: 6084 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6085 if constructor_type: 6086 return build_array_constructor( 6087 constructor_type, 6088 args=expressions, 6089 bracket_kind=bracket_kind, 6090 dialect=self.dialect, 6091 ) 6092 6093 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6094 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6095 6096 self._add_comments(this) 6097 return self._parse_bracket(this) 6098 6099 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6100 if self._match(TokenType.COLON): 6101 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6102 return this 6103 6104 def _parse_case(self) -> t.Optional[exp.Expression]: 6105 ifs = [] 6106 default = None 6107 6108 comments = self._prev_comments 6109 expression = self._parse_assignment() 6110 6111 while self._match(TokenType.WHEN): 6112 this = self._parse_assignment() 6113 self._match(TokenType.THEN) 6114 then = self._parse_assignment() 6115 ifs.append(self.expression(exp.If, this=this, true=then)) 6116 6117 if self._match(TokenType.ELSE): 6118 default = self._parse_assignment() 6119 6120 if not self._match(TokenType.END): 6121 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6122 default = exp.column("interval") 6123 else: 6124 self.raise_error("Expected END after CASE", self._prev) 6125 6126 return self.expression( 6127 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6128 ) 6129 6130 def _parse_if(self) -> t.Optional[exp.Expression]: 6131 if self._match(TokenType.L_PAREN): 6132 args = self._parse_csv(self._parse_assignment) 6133 this = self.validate_expression(exp.If.from_arg_list(args), args) 6134 self._match_r_paren() 6135 else: 6136 index = self._index - 1 6137 6138 if self.NO_PAREN_IF_COMMANDS and index == 0: 6139 return self._parse_as_command(self._prev) 6140 6141 condition = self._parse_assignment() 6142 6143 if not condition: 6144 self._retreat(index) 6145 return None 6146 6147 self._match(TokenType.THEN) 6148 true = self._parse_assignment() 6149 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6150 self._match(TokenType.END) 6151 this = self.expression(exp.If, this=condition, true=true, false=false) 6152 6153 return this 6154 6155 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6156 if not self._match_text_seq("VALUE", "FOR"): 6157 self._retreat(self._index - 1) 6158 return None 6159 6160 return self.expression( 6161 exp.NextValueFor, 6162 this=self._parse_column(), 6163 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6164 ) 6165 6166 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6167 this = self._parse_function() or self._parse_var_or_string(upper=True) 6168 6169 if self._match(TokenType.FROM): 6170 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6171 6172 if not self._match(TokenType.COMMA): 6173 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6174 6175 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6176 6177 def _parse_gap_fill(self) -> exp.GapFill: 6178 self._match(TokenType.TABLE) 6179 this = self._parse_table() 6180 6181 self._match(TokenType.COMMA) 6182 args = [this, *self._parse_csv(self._parse_lambda)] 6183 6184 gap_fill = exp.GapFill.from_arg_list(args) 6185 return self.validate_expression(gap_fill, args) 6186 6187 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6188 this = self._parse_assignment() 6189 6190 if not self._match(TokenType.ALIAS): 6191 if self._match(TokenType.COMMA): 6192 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6193 6194 self.raise_error("Expected AS after CAST") 6195 6196 fmt = None 6197 to = self._parse_types() 6198 6199 default = self._match(TokenType.DEFAULT) 6200 if default: 6201 default = self._parse_bitwise() 6202 self._match_text_seq("ON", "CONVERSION", "ERROR") 6203 6204 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6205 fmt_string = self._parse_string() 6206 fmt = self._parse_at_time_zone(fmt_string) 6207 6208 if not to: 6209 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6210 if to.this in exp.DataType.TEMPORAL_TYPES: 6211 this = self.expression( 6212 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6213 this=this, 6214 format=exp.Literal.string( 6215 format_time( 6216 fmt_string.this if fmt_string else "", 6217 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6218 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6219 ) 6220 ), 6221 safe=safe, 6222 ) 6223 6224 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6225 this.set("zone", fmt.args["zone"]) 6226 return this 6227 elif not to: 6228 self.raise_error("Expected TYPE after CAST") 6229 elif isinstance(to, exp.Identifier): 6230 to = exp.DataType.build(to.name, udt=True) 6231 elif to.this == exp.DataType.Type.CHAR: 6232 if self._match(TokenType.CHARACTER_SET): 6233 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6234 6235 return self.expression( 6236 exp.Cast if strict else exp.TryCast, 6237 this=this, 6238 to=to, 6239 format=fmt, 6240 safe=safe, 6241 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6242 default=default, 6243 ) 6244 6245 def _parse_string_agg(self) -> exp.GroupConcat: 6246 if self._match(TokenType.DISTINCT): 6247 args: t.List[t.Optional[exp.Expression]] = [ 6248 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6249 ] 6250 if self._match(TokenType.COMMA): 6251 args.extend(self._parse_csv(self._parse_assignment)) 6252 else: 6253 args = self._parse_csv(self._parse_assignment) # type: ignore 6254 6255 if self._match_text_seq("ON", "OVERFLOW"): 6256 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6257 if self._match_text_seq("ERROR"): 6258 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6259 else: 6260 self._match_text_seq("TRUNCATE") 6261 on_overflow = self.expression( 6262 exp.OverflowTruncateBehavior, 6263 this=self._parse_string(), 6264 with_count=( 6265 self._match_text_seq("WITH", "COUNT") 6266 or not self._match_text_seq("WITHOUT", "COUNT") 6267 ), 6268 ) 6269 else: 6270 on_overflow = None 6271 6272 index = self._index 6273 if not self._match(TokenType.R_PAREN) and args: 6274 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6275 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6276 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6277 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6278 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6279 6280 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6281 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6282 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6283 if not self._match_text_seq("WITHIN", "GROUP"): 6284 self._retreat(index) 6285 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6286 6287 # The corresponding match_r_paren will be called in parse_function (caller) 6288 self._match_l_paren() 6289 6290 return self.expression( 6291 exp.GroupConcat, 6292 this=self._parse_order(this=seq_get(args, 0)), 6293 separator=seq_get(args, 1), 6294 on_overflow=on_overflow, 6295 ) 6296 6297 def _parse_convert( 6298 self, strict: bool, safe: t.Optional[bool] = None 6299 ) -> t.Optional[exp.Expression]: 6300 this = self._parse_bitwise() 6301 6302 if self._match(TokenType.USING): 6303 to: t.Optional[exp.Expression] = self.expression( 6304 exp.CharacterSet, this=self._parse_var() 6305 ) 6306 elif self._match(TokenType.COMMA): 6307 to = self._parse_types() 6308 else: 6309 to = None 6310 6311 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6312 6313 def _parse_xml_table(self) -> exp.XMLTable: 6314 namespaces = None 6315 passing = None 6316 columns = None 6317 6318 if self._match_text_seq("XMLNAMESPACES", "("): 6319 namespaces = self._parse_xml_namespace() 6320 self._match_text_seq(")", ",") 6321 6322 this = self._parse_string() 6323 6324 if self._match_text_seq("PASSING"): 6325 # The BY VALUE keywords are optional and are provided for semantic clarity 6326 self._match_text_seq("BY", "VALUE") 6327 passing = self._parse_csv(self._parse_column) 6328 6329 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6330 6331 if self._match_text_seq("COLUMNS"): 6332 columns = self._parse_csv(self._parse_field_def) 6333 6334 return self.expression( 6335 exp.XMLTable, 6336 this=this, 6337 namespaces=namespaces, 6338 passing=passing, 6339 columns=columns, 6340 by_ref=by_ref, 6341 ) 6342 6343 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6344 namespaces = [] 6345 6346 while True: 6347 if self._match(TokenType.DEFAULT): 6348 uri = self._parse_string() 6349 else: 6350 uri = self._parse_alias(self._parse_string()) 6351 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6352 if not self._match(TokenType.COMMA): 6353 break 6354 6355 return namespaces 6356 6357 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6358 """ 6359 There are generally two variants of the DECODE function: 6360 6361 - DECODE(bin, charset) 6362 - DECODE(expression, search, result [, search, result] ... [, default]) 6363 6364 The second variant will always be parsed into a CASE expression. Note that NULL 6365 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6366 instead of relying on pattern matching. 6367 """ 6368 args = self._parse_csv(self._parse_assignment) 6369 6370 if len(args) < 3: 6371 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6372 6373 expression, *expressions = args 6374 if not expression: 6375 return None 6376 6377 ifs = [] 6378 for search, result in zip(expressions[::2], expressions[1::2]): 6379 if not search or not result: 6380 return None 6381 6382 if isinstance(search, exp.Literal): 6383 ifs.append( 6384 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6385 ) 6386 elif isinstance(search, exp.Null): 6387 ifs.append( 6388 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6389 ) 6390 else: 6391 cond = exp.or_( 6392 exp.EQ(this=expression.copy(), expression=search), 6393 exp.and_( 6394 exp.Is(this=expression.copy(), expression=exp.Null()), 6395 exp.Is(this=search.copy(), expression=exp.Null()), 6396 copy=False, 6397 ), 6398 copy=False, 6399 ) 6400 ifs.append(exp.If(this=cond, true=result)) 6401 6402 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6403 6404 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6405 self._match_text_seq("KEY") 6406 key = self._parse_column() 6407 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6408 self._match_text_seq("VALUE") 6409 value = self._parse_bitwise() 6410 6411 if not key and not value: 6412 return None 6413 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6414 6415 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6416 if not this or not self._match_text_seq("FORMAT", "JSON"): 6417 return this 6418 6419 return self.expression(exp.FormatJson, this=this) 6420 6421 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6422 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6423 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6424 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6425 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6426 else: 6427 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6428 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6429 6430 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6431 6432 if not empty and not error and not null: 6433 return None 6434 6435 return self.expression( 6436 exp.OnCondition, 6437 empty=empty, 6438 error=error, 6439 null=null, 6440 ) 6441 6442 def _parse_on_handling( 6443 self, on: str, *values: str 6444 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6445 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6446 for value in values: 6447 if self._match_text_seq(value, "ON", on): 6448 return f"{value} ON {on}" 6449 6450 index = self._index 6451 if self._match(TokenType.DEFAULT): 6452 default_value = self._parse_bitwise() 6453 if self._match_text_seq("ON", on): 6454 return default_value 6455 6456 self._retreat(index) 6457 6458 return None 6459 6460 @t.overload 6461 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6462 6463 @t.overload 6464 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6465 6466 def _parse_json_object(self, agg=False): 6467 star = self._parse_star() 6468 expressions = ( 6469 [star] 6470 if star 6471 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6472 ) 6473 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6474 6475 unique_keys = None 6476 if self._match_text_seq("WITH", "UNIQUE"): 6477 unique_keys = True 6478 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6479 unique_keys = False 6480 6481 self._match_text_seq("KEYS") 6482 6483 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6484 self._parse_type() 6485 ) 6486 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6487 6488 return self.expression( 6489 exp.JSONObjectAgg if agg else exp.JSONObject, 6490 expressions=expressions, 6491 null_handling=null_handling, 6492 unique_keys=unique_keys, 6493 return_type=return_type, 6494 encoding=encoding, 6495 ) 6496 6497 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6498 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6499 if not self._match_text_seq("NESTED"): 6500 this = self._parse_id_var() 6501 kind = self._parse_types(allow_identifiers=False) 6502 nested = None 6503 else: 6504 this = None 6505 kind = None 6506 nested = True 6507 6508 path = self._match_text_seq("PATH") and self._parse_string() 6509 nested_schema = nested and self._parse_json_schema() 6510 6511 return self.expression( 6512 exp.JSONColumnDef, 6513 this=this, 6514 kind=kind, 6515 path=path, 6516 nested_schema=nested_schema, 6517 ) 6518 6519 def _parse_json_schema(self) -> exp.JSONSchema: 6520 self._match_text_seq("COLUMNS") 6521 return self.expression( 6522 exp.JSONSchema, 6523 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6524 ) 6525 6526 def _parse_json_table(self) -> exp.JSONTable: 6527 this = self._parse_format_json(self._parse_bitwise()) 6528 path = self._match(TokenType.COMMA) and self._parse_string() 6529 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6530 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6531 schema = self._parse_json_schema() 6532 6533 return exp.JSONTable( 6534 this=this, 6535 schema=schema, 6536 path=path, 6537 error_handling=error_handling, 6538 empty_handling=empty_handling, 6539 ) 6540 6541 def _parse_match_against(self) -> exp.MatchAgainst: 6542 expressions = self._parse_csv(self._parse_column) 6543 6544 self._match_text_seq(")", "AGAINST", "(") 6545 6546 this = self._parse_string() 6547 6548 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6549 modifier = "IN NATURAL LANGUAGE MODE" 6550 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6551 modifier = f"{modifier} WITH QUERY EXPANSION" 6552 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6553 modifier = "IN BOOLEAN MODE" 6554 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6555 modifier = "WITH QUERY EXPANSION" 6556 else: 6557 modifier = None 6558 6559 return self.expression( 6560 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6561 ) 6562 6563 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6564 def _parse_open_json(self) -> exp.OpenJSON: 6565 this = self._parse_bitwise() 6566 path = self._match(TokenType.COMMA) and self._parse_string() 6567 6568 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6569 this = self._parse_field(any_token=True) 6570 kind = self._parse_types() 6571 path = self._parse_string() 6572 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6573 6574 return self.expression( 6575 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6576 ) 6577 6578 expressions = None 6579 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6580 self._match_l_paren() 6581 expressions = self._parse_csv(_parse_open_json_column_def) 6582 6583 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6584 6585 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6586 args = self._parse_csv(self._parse_bitwise) 6587 6588 if self._match(TokenType.IN): 6589 return self.expression( 6590 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6591 ) 6592 6593 if haystack_first: 6594 haystack = seq_get(args, 0) 6595 needle = seq_get(args, 1) 6596 else: 6597 haystack = seq_get(args, 1) 6598 needle = seq_get(args, 0) 6599 6600 return self.expression( 6601 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6602 ) 6603 6604 def _parse_predict(self) -> exp.Predict: 6605 self._match_text_seq("MODEL") 6606 this = self._parse_table() 6607 6608 self._match(TokenType.COMMA) 6609 self._match_text_seq("TABLE") 6610 6611 return self.expression( 6612 exp.Predict, 6613 this=this, 6614 expression=self._parse_table(), 6615 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6616 ) 6617 6618 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6619 args = self._parse_csv(self._parse_table) 6620 return exp.JoinHint(this=func_name.upper(), expressions=args) 6621 6622 def _parse_substring(self) -> exp.Substring: 6623 # Postgres supports the form: substring(string [from int] [for int]) 6624 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6625 6626 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6627 6628 if self._match(TokenType.FROM): 6629 args.append(self._parse_bitwise()) 6630 if self._match(TokenType.FOR): 6631 if len(args) == 1: 6632 args.append(exp.Literal.number(1)) 6633 args.append(self._parse_bitwise()) 6634 6635 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6636 6637 def _parse_trim(self) -> exp.Trim: 6638 # https://www.w3resource.com/sql/character-functions/trim.php 6639 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6640 6641 position = None 6642 collation = None 6643 expression = None 6644 6645 if self._match_texts(self.TRIM_TYPES): 6646 position = self._prev.text.upper() 6647 6648 this = self._parse_bitwise() 6649 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6650 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6651 expression = self._parse_bitwise() 6652 6653 if invert_order: 6654 this, expression = expression, this 6655 6656 if self._match(TokenType.COLLATE): 6657 collation = self._parse_bitwise() 6658 6659 return self.expression( 6660 exp.Trim, this=this, position=position, expression=expression, collation=collation 6661 ) 6662 6663 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6664 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6665 6666 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6667 return self._parse_window(self._parse_id_var(), alias=True) 6668 6669 def _parse_respect_or_ignore_nulls( 6670 self, this: t.Optional[exp.Expression] 6671 ) -> t.Optional[exp.Expression]: 6672 if self._match_text_seq("IGNORE", "NULLS"): 6673 return self.expression(exp.IgnoreNulls, this=this) 6674 if self._match_text_seq("RESPECT", "NULLS"): 6675 return self.expression(exp.RespectNulls, this=this) 6676 return this 6677 6678 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6679 if self._match(TokenType.HAVING): 6680 self._match_texts(("MAX", "MIN")) 6681 max = self._prev.text.upper() != "MIN" 6682 return self.expression( 6683 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6684 ) 6685 6686 return this 6687 6688 def _parse_window( 6689 self, this: t.Optional[exp.Expression], alias: bool = False 6690 ) -> t.Optional[exp.Expression]: 6691 func = this 6692 comments = func.comments if isinstance(func, exp.Expression) else None 6693 6694 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6695 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6696 if self._match_text_seq("WITHIN", "GROUP"): 6697 order = self._parse_wrapped(self._parse_order) 6698 this = self.expression(exp.WithinGroup, this=this, expression=order) 6699 6700 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6701 self._match(TokenType.WHERE) 6702 this = self.expression( 6703 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6704 ) 6705 self._match_r_paren() 6706 6707 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6708 # Some dialects choose to implement and some do not. 6709 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6710 6711 # There is some code above in _parse_lambda that handles 6712 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6713 6714 # The below changes handle 6715 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6716 6717 # Oracle allows both formats 6718 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6719 # and Snowflake chose to do the same for familiarity 6720 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6721 if isinstance(this, exp.AggFunc): 6722 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6723 6724 if ignore_respect and ignore_respect is not this: 6725 ignore_respect.replace(ignore_respect.this) 6726 this = self.expression(ignore_respect.__class__, this=this) 6727 6728 this = self._parse_respect_or_ignore_nulls(this) 6729 6730 # bigquery select from window x AS (partition by ...) 6731 if alias: 6732 over = None 6733 self._match(TokenType.ALIAS) 6734 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6735 return this 6736 else: 6737 over = self._prev.text.upper() 6738 6739 if comments and isinstance(func, exp.Expression): 6740 func.pop_comments() 6741 6742 if not self._match(TokenType.L_PAREN): 6743 return self.expression( 6744 exp.Window, 6745 comments=comments, 6746 this=this, 6747 alias=self._parse_id_var(False), 6748 over=over, 6749 ) 6750 6751 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6752 6753 first = self._match(TokenType.FIRST) 6754 if self._match_text_seq("LAST"): 6755 first = False 6756 6757 partition, order = self._parse_partition_and_order() 6758 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6759 6760 if kind: 6761 self._match(TokenType.BETWEEN) 6762 start = self._parse_window_spec() 6763 self._match(TokenType.AND) 6764 end = self._parse_window_spec() 6765 6766 spec = self.expression( 6767 exp.WindowSpec, 6768 kind=kind, 6769 start=start["value"], 6770 start_side=start["side"], 6771 end=end["value"], 6772 end_side=end["side"], 6773 ) 6774 else: 6775 spec = None 6776 6777 self._match_r_paren() 6778 6779 window = self.expression( 6780 exp.Window, 6781 comments=comments, 6782 this=this, 6783 partition_by=partition, 6784 order=order, 6785 spec=spec, 6786 alias=window_alias, 6787 over=over, 6788 first=first, 6789 ) 6790 6791 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6792 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6793 return self._parse_window(window, alias=alias) 6794 6795 return window 6796 6797 def _parse_partition_and_order( 6798 self, 6799 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6800 return self._parse_partition_by(), self._parse_order() 6801 6802 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6803 self._match(TokenType.BETWEEN) 6804 6805 return { 6806 "value": ( 6807 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6808 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6809 or self._parse_bitwise() 6810 ), 6811 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6812 } 6813 6814 def _parse_alias( 6815 self, this: t.Optional[exp.Expression], explicit: bool = False 6816 ) -> t.Optional[exp.Expression]: 6817 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6818 # so this section tries to parse the clause version and if it fails, it treats the token 6819 # as an identifier (alias) 6820 if self._can_parse_limit_or_offset(): 6821 return this 6822 6823 any_token = self._match(TokenType.ALIAS) 6824 comments = self._prev_comments or [] 6825 6826 if explicit and not any_token: 6827 return this 6828 6829 if self._match(TokenType.L_PAREN): 6830 aliases = self.expression( 6831 exp.Aliases, 6832 comments=comments, 6833 this=this, 6834 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6835 ) 6836 self._match_r_paren(aliases) 6837 return aliases 6838 6839 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6840 self.STRING_ALIASES and self._parse_string_as_identifier() 6841 ) 6842 6843 if alias: 6844 comments.extend(alias.pop_comments()) 6845 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6846 column = this.this 6847 6848 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6849 if not this.comments and column and column.comments: 6850 this.comments = column.pop_comments() 6851 6852 return this 6853 6854 def _parse_id_var( 6855 self, 6856 any_token: bool = True, 6857 tokens: t.Optional[t.Collection[TokenType]] = None, 6858 ) -> t.Optional[exp.Expression]: 6859 expression = self._parse_identifier() 6860 if not expression and ( 6861 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6862 ): 6863 quoted = self._prev.token_type == TokenType.STRING 6864 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6865 6866 return expression 6867 6868 def _parse_string(self) -> t.Optional[exp.Expression]: 6869 if self._match_set(self.STRING_PARSERS): 6870 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6871 return self._parse_placeholder() 6872 6873 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6874 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6875 6876 def _parse_number(self) -> t.Optional[exp.Expression]: 6877 if self._match_set(self.NUMERIC_PARSERS): 6878 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6879 return self._parse_placeholder() 6880 6881 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6882 if self._match(TokenType.IDENTIFIER): 6883 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6884 return self._parse_placeholder() 6885 6886 def _parse_var( 6887 self, 6888 any_token: bool = False, 6889 tokens: t.Optional[t.Collection[TokenType]] = None, 6890 upper: bool = False, 6891 ) -> t.Optional[exp.Expression]: 6892 if ( 6893 (any_token and self._advance_any()) 6894 or self._match(TokenType.VAR) 6895 or (self._match_set(tokens) if tokens else False) 6896 ): 6897 return self.expression( 6898 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6899 ) 6900 return self._parse_placeholder() 6901 6902 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6903 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6904 self._advance() 6905 return self._prev 6906 return None 6907 6908 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6909 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6910 6911 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6912 return self._parse_primary() or self._parse_var(any_token=True) 6913 6914 def _parse_null(self) -> t.Optional[exp.Expression]: 6915 if self._match_set(self.NULL_TOKENS): 6916 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6917 return self._parse_placeholder() 6918 6919 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6920 if self._match(TokenType.TRUE): 6921 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6922 if self._match(TokenType.FALSE): 6923 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6924 return self._parse_placeholder() 6925 6926 def _parse_star(self) -> t.Optional[exp.Expression]: 6927 if self._match(TokenType.STAR): 6928 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6929 return self._parse_placeholder() 6930 6931 def _parse_parameter(self) -> exp.Parameter: 6932 this = self._parse_identifier() or self._parse_primary_or_var() 6933 return self.expression(exp.Parameter, this=this) 6934 6935 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6936 if self._match_set(self.PLACEHOLDER_PARSERS): 6937 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6938 if placeholder: 6939 return placeholder 6940 self._advance(-1) 6941 return None 6942 6943 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6944 if not self._match_texts(keywords): 6945 return None 6946 if self._match(TokenType.L_PAREN, advance=False): 6947 return self._parse_wrapped_csv(self._parse_expression) 6948 6949 expression = self._parse_expression() 6950 return [expression] if expression else None 6951 6952 def _parse_csv( 6953 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6954 ) -> t.List[exp.Expression]: 6955 parse_result = parse_method() 6956 items = [parse_result] if parse_result is not None else [] 6957 6958 while self._match(sep): 6959 self._add_comments(parse_result) 6960 parse_result = parse_method() 6961 if parse_result is not None: 6962 items.append(parse_result) 6963 6964 return items 6965 6966 def _parse_tokens( 6967 self, parse_method: t.Callable, expressions: t.Dict 6968 ) -> t.Optional[exp.Expression]: 6969 this = parse_method() 6970 6971 while self._match_set(expressions): 6972 this = self.expression( 6973 expressions[self._prev.token_type], 6974 this=this, 6975 comments=self._prev_comments, 6976 expression=parse_method(), 6977 ) 6978 6979 return this 6980 6981 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6982 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6983 6984 def _parse_wrapped_csv( 6985 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6986 ) -> t.List[exp.Expression]: 6987 return self._parse_wrapped( 6988 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6989 ) 6990 6991 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6992 wrapped = self._match(TokenType.L_PAREN) 6993 if not wrapped and not optional: 6994 self.raise_error("Expecting (") 6995 parse_result = parse_method() 6996 if wrapped: 6997 self._match_r_paren() 6998 return parse_result 6999 7000 def _parse_expressions(self) -> t.List[exp.Expression]: 7001 return self._parse_csv(self._parse_expression) 7002 7003 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7004 return self._parse_select() or self._parse_set_operations( 7005 self._parse_alias(self._parse_assignment(), explicit=True) 7006 if alias 7007 else self._parse_assignment() 7008 ) 7009 7010 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7011 return self._parse_query_modifiers( 7012 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7013 ) 7014 7015 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7016 this = None 7017 if self._match_texts(self.TRANSACTION_KIND): 7018 this = self._prev.text 7019 7020 self._match_texts(("TRANSACTION", "WORK")) 7021 7022 modes = [] 7023 while True: 7024 mode = [] 7025 while self._match(TokenType.VAR): 7026 mode.append(self._prev.text) 7027 7028 if mode: 7029 modes.append(" ".join(mode)) 7030 if not self._match(TokenType.COMMA): 7031 break 7032 7033 return self.expression(exp.Transaction, this=this, modes=modes) 7034 7035 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7036 chain = None 7037 savepoint = None 7038 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7039 7040 self._match_texts(("TRANSACTION", "WORK")) 7041 7042 if self._match_text_seq("TO"): 7043 self._match_text_seq("SAVEPOINT") 7044 savepoint = self._parse_id_var() 7045 7046 if self._match(TokenType.AND): 7047 chain = not self._match_text_seq("NO") 7048 self._match_text_seq("CHAIN") 7049 7050 if is_rollback: 7051 return self.expression(exp.Rollback, savepoint=savepoint) 7052 7053 return self.expression(exp.Commit, chain=chain) 7054 7055 def _parse_refresh(self) -> exp.Refresh: 7056 self._match(TokenType.TABLE) 7057 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7058 7059 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7060 if not self._match_text_seq("ADD"): 7061 return None 7062 7063 self._match(TokenType.COLUMN) 7064 exists_column = self._parse_exists(not_=True) 7065 expression = self._parse_field_def() 7066 7067 if expression: 7068 expression.set("exists", exists_column) 7069 7070 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7071 if self._match_texts(("FIRST", "AFTER")): 7072 position = self._prev.text 7073 column_position = self.expression( 7074 exp.ColumnPosition, this=self._parse_column(), position=position 7075 ) 7076 expression.set("position", column_position) 7077 7078 return expression 7079 7080 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7081 drop = self._match(TokenType.DROP) and self._parse_drop() 7082 if drop and not isinstance(drop, exp.Command): 7083 drop.set("kind", drop.args.get("kind", "COLUMN")) 7084 return drop 7085 7086 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7087 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7088 return self.expression( 7089 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7090 ) 7091 7092 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7093 index = self._index - 1 7094 7095 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7096 return self._parse_csv( 7097 lambda: self.expression( 7098 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7099 ) 7100 ) 7101 7102 self._retreat(index) 7103 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7104 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7105 7106 if self._match_text_seq("ADD", "COLUMNS"): 7107 schema = self._parse_schema() 7108 if schema: 7109 return [schema] 7110 return [] 7111 7112 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7113 7114 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7115 if self._match_texts(self.ALTER_ALTER_PARSERS): 7116 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7117 7118 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7119 # keyword after ALTER we default to parsing this statement 7120 self._match(TokenType.COLUMN) 7121 column = self._parse_field(any_token=True) 7122 7123 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7124 return self.expression(exp.AlterColumn, this=column, drop=True) 7125 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7126 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7127 if self._match(TokenType.COMMENT): 7128 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7129 if self._match_text_seq("DROP", "NOT", "NULL"): 7130 return self.expression( 7131 exp.AlterColumn, 7132 this=column, 7133 drop=True, 7134 allow_null=True, 7135 ) 7136 if self._match_text_seq("SET", "NOT", "NULL"): 7137 return self.expression( 7138 exp.AlterColumn, 7139 this=column, 7140 allow_null=False, 7141 ) 7142 7143 if self._match_text_seq("SET", "VISIBLE"): 7144 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7145 if self._match_text_seq("SET", "INVISIBLE"): 7146 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7147 7148 self._match_text_seq("SET", "DATA") 7149 self._match_text_seq("TYPE") 7150 return self.expression( 7151 exp.AlterColumn, 7152 this=column, 7153 dtype=self._parse_types(), 7154 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7155 using=self._match(TokenType.USING) and self._parse_assignment(), 7156 ) 7157 7158 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7159 if self._match_texts(("ALL", "EVEN", "AUTO")): 7160 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7161 7162 self._match_text_seq("KEY", "DISTKEY") 7163 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7164 7165 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7166 if compound: 7167 self._match_text_seq("SORTKEY") 7168 7169 if self._match(TokenType.L_PAREN, advance=False): 7170 return self.expression( 7171 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7172 ) 7173 7174 self._match_texts(("AUTO", "NONE")) 7175 return self.expression( 7176 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7177 ) 7178 7179 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7180 index = self._index - 1 7181 7182 partition_exists = self._parse_exists() 7183 if self._match(TokenType.PARTITION, advance=False): 7184 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7185 7186 self._retreat(index) 7187 return self._parse_csv(self._parse_drop_column) 7188 7189 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7190 if self._match(TokenType.COLUMN): 7191 exists = self._parse_exists() 7192 old_column = self._parse_column() 7193 to = self._match_text_seq("TO") 7194 new_column = self._parse_column() 7195 7196 if old_column is None or to is None or new_column is None: 7197 return None 7198 7199 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7200 7201 self._match_text_seq("TO") 7202 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7203 7204 def _parse_alter_table_set(self) -> exp.AlterSet: 7205 alter_set = self.expression(exp.AlterSet) 7206 7207 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7208 "TABLE", "PROPERTIES" 7209 ): 7210 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7211 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7212 alter_set.set("expressions", [self._parse_assignment()]) 7213 elif self._match_texts(("LOGGED", "UNLOGGED")): 7214 alter_set.set("option", exp.var(self._prev.text.upper())) 7215 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7216 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7217 elif self._match_text_seq("LOCATION"): 7218 alter_set.set("location", self._parse_field()) 7219 elif self._match_text_seq("ACCESS", "METHOD"): 7220 alter_set.set("access_method", self._parse_field()) 7221 elif self._match_text_seq("TABLESPACE"): 7222 alter_set.set("tablespace", self._parse_field()) 7223 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7224 alter_set.set("file_format", [self._parse_field()]) 7225 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7226 alter_set.set("file_format", self._parse_wrapped_options()) 7227 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7228 alter_set.set("copy_options", self._parse_wrapped_options()) 7229 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7230 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7231 else: 7232 if self._match_text_seq("SERDE"): 7233 alter_set.set("serde", self._parse_field()) 7234 7235 alter_set.set("expressions", [self._parse_properties()]) 7236 7237 return alter_set 7238 7239 def _parse_alter(self) -> exp.Alter | exp.Command: 7240 start = self._prev 7241 7242 alter_token = self._match_set(self.ALTERABLES) and self._prev 7243 if not alter_token: 7244 return self._parse_as_command(start) 7245 7246 exists = self._parse_exists() 7247 only = self._match_text_seq("ONLY") 7248 this = self._parse_table(schema=True) 7249 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7250 7251 if self._next: 7252 self._advance() 7253 7254 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7255 if parser: 7256 actions = ensure_list(parser(self)) 7257 not_valid = self._match_text_seq("NOT", "VALID") 7258 options = self._parse_csv(self._parse_property) 7259 7260 if not self._curr and actions: 7261 return self.expression( 7262 exp.Alter, 7263 this=this, 7264 kind=alter_token.text.upper(), 7265 exists=exists, 7266 actions=actions, 7267 only=only, 7268 options=options, 7269 cluster=cluster, 7270 not_valid=not_valid, 7271 ) 7272 7273 return self._parse_as_command(start) 7274 7275 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7276 start = self._prev 7277 # https://duckdb.org/docs/sql/statements/analyze 7278 if not self._curr: 7279 return self.expression(exp.Analyze) 7280 7281 options = [] 7282 while self._match_texts(self.ANALYZE_STYLES): 7283 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7284 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7285 else: 7286 options.append(self._prev.text.upper()) 7287 7288 this: t.Optional[exp.Expression] = None 7289 inner_expression: t.Optional[exp.Expression] = None 7290 7291 kind = self._curr and self._curr.text.upper() 7292 7293 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7294 this = self._parse_table_parts() 7295 elif self._match_text_seq("TABLES"): 7296 if self._match_set((TokenType.FROM, TokenType.IN)): 7297 kind = f"{kind} {self._prev.text.upper()}" 7298 this = self._parse_table(schema=True, is_db_reference=True) 7299 elif self._match_text_seq("DATABASE"): 7300 this = self._parse_table(schema=True, is_db_reference=True) 7301 elif self._match_text_seq("CLUSTER"): 7302 this = self._parse_table() 7303 # Try matching inner expr keywords before fallback to parse table. 7304 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7305 kind = None 7306 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7307 else: 7308 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7309 kind = None 7310 this = self._parse_table_parts() 7311 7312 partition = self._try_parse(self._parse_partition) 7313 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7314 return self._parse_as_command(start) 7315 7316 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7317 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7318 "WITH", "ASYNC", "MODE" 7319 ): 7320 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7321 else: 7322 mode = None 7323 7324 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7325 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7326 7327 properties = self._parse_properties() 7328 return self.expression( 7329 exp.Analyze, 7330 kind=kind, 7331 this=this, 7332 mode=mode, 7333 partition=partition, 7334 properties=properties, 7335 expression=inner_expression, 7336 options=options, 7337 ) 7338 7339 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7340 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7341 this = None 7342 kind = self._prev.text.upper() 7343 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7344 expressions = [] 7345 7346 if not self._match_text_seq("STATISTICS"): 7347 self.raise_error("Expecting token STATISTICS") 7348 7349 if self._match_text_seq("NOSCAN"): 7350 this = "NOSCAN" 7351 elif self._match(TokenType.FOR): 7352 if self._match_text_seq("ALL", "COLUMNS"): 7353 this = "FOR ALL COLUMNS" 7354 if self._match_texts("COLUMNS"): 7355 this = "FOR COLUMNS" 7356 expressions = self._parse_csv(self._parse_column_reference) 7357 elif self._match_text_seq("SAMPLE"): 7358 sample = self._parse_number() 7359 expressions = [ 7360 self.expression( 7361 exp.AnalyzeSample, 7362 sample=sample, 7363 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7364 ) 7365 ] 7366 7367 return self.expression( 7368 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7369 ) 7370 7371 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7372 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7373 kind = None 7374 this = None 7375 expression: t.Optional[exp.Expression] = None 7376 if self._match_text_seq("REF", "UPDATE"): 7377 kind = "REF" 7378 this = "UPDATE" 7379 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7380 this = "UPDATE SET DANGLING TO NULL" 7381 elif self._match_text_seq("STRUCTURE"): 7382 kind = "STRUCTURE" 7383 if self._match_text_seq("CASCADE", "FAST"): 7384 this = "CASCADE FAST" 7385 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7386 ("ONLINE", "OFFLINE") 7387 ): 7388 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7389 expression = self._parse_into() 7390 7391 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7392 7393 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7394 this = self._prev.text.upper() 7395 if self._match_text_seq("COLUMNS"): 7396 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7397 return None 7398 7399 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7400 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7401 if self._match_text_seq("STATISTICS"): 7402 return self.expression(exp.AnalyzeDelete, kind=kind) 7403 return None 7404 7405 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7406 if self._match_text_seq("CHAINED", "ROWS"): 7407 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7408 return None 7409 7410 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7411 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7412 this = self._prev.text.upper() 7413 expression: t.Optional[exp.Expression] = None 7414 expressions = [] 7415 update_options = None 7416 7417 if self._match_text_seq("HISTOGRAM", "ON"): 7418 expressions = self._parse_csv(self._parse_column_reference) 7419 with_expressions = [] 7420 while self._match(TokenType.WITH): 7421 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7422 if self._match_texts(("SYNC", "ASYNC")): 7423 if self._match_text_seq("MODE", advance=False): 7424 with_expressions.append(f"{self._prev.text.upper()} MODE") 7425 self._advance() 7426 else: 7427 buckets = self._parse_number() 7428 if self._match_text_seq("BUCKETS"): 7429 with_expressions.append(f"{buckets} BUCKETS") 7430 if with_expressions: 7431 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7432 7433 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7434 TokenType.UPDATE, advance=False 7435 ): 7436 update_options = self._prev.text.upper() 7437 self._advance() 7438 elif self._match_text_seq("USING", "DATA"): 7439 expression = self.expression(exp.UsingData, this=self._parse_string()) 7440 7441 return self.expression( 7442 exp.AnalyzeHistogram, 7443 this=this, 7444 expressions=expressions, 7445 expression=expression, 7446 update_options=update_options, 7447 ) 7448 7449 def _parse_merge(self) -> exp.Merge: 7450 self._match(TokenType.INTO) 7451 target = self._parse_table() 7452 7453 if target and self._match(TokenType.ALIAS, advance=False): 7454 target.set("alias", self._parse_table_alias()) 7455 7456 self._match(TokenType.USING) 7457 using = self._parse_table() 7458 7459 self._match(TokenType.ON) 7460 on = self._parse_assignment() 7461 7462 return self.expression( 7463 exp.Merge, 7464 this=target, 7465 using=using, 7466 on=on, 7467 whens=self._parse_when_matched(), 7468 returning=self._parse_returning(), 7469 ) 7470 7471 def _parse_when_matched(self) -> exp.Whens: 7472 whens = [] 7473 7474 while self._match(TokenType.WHEN): 7475 matched = not self._match(TokenType.NOT) 7476 self._match_text_seq("MATCHED") 7477 source = ( 7478 False 7479 if self._match_text_seq("BY", "TARGET") 7480 else self._match_text_seq("BY", "SOURCE") 7481 ) 7482 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7483 7484 self._match(TokenType.THEN) 7485 7486 if self._match(TokenType.INSERT): 7487 this = self._parse_star() 7488 if this: 7489 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7490 else: 7491 then = self.expression( 7492 exp.Insert, 7493 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7494 expression=self._match_text_seq("VALUES") and self._parse_value(), 7495 ) 7496 elif self._match(TokenType.UPDATE): 7497 expressions = self._parse_star() 7498 if expressions: 7499 then = self.expression(exp.Update, expressions=expressions) 7500 else: 7501 then = self.expression( 7502 exp.Update, 7503 expressions=self._match(TokenType.SET) 7504 and self._parse_csv(self._parse_equality), 7505 ) 7506 elif self._match(TokenType.DELETE): 7507 then = self.expression(exp.Var, this=self._prev.text) 7508 else: 7509 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7510 7511 whens.append( 7512 self.expression( 7513 exp.When, 7514 matched=matched, 7515 source=source, 7516 condition=condition, 7517 then=then, 7518 ) 7519 ) 7520 return self.expression(exp.Whens, expressions=whens) 7521 7522 def _parse_show(self) -> t.Optional[exp.Expression]: 7523 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7524 if parser: 7525 return parser(self) 7526 return self._parse_as_command(self._prev) 7527 7528 def _parse_set_item_assignment( 7529 self, kind: t.Optional[str] = None 7530 ) -> t.Optional[exp.Expression]: 7531 index = self._index 7532 7533 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7534 return self._parse_set_transaction(global_=kind == "GLOBAL") 7535 7536 left = self._parse_primary() or self._parse_column() 7537 assignment_delimiter = self._match_texts(("=", "TO")) 7538 7539 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7540 self._retreat(index) 7541 return None 7542 7543 right = self._parse_statement() or self._parse_id_var() 7544 if isinstance(right, (exp.Column, exp.Identifier)): 7545 right = exp.var(right.name) 7546 7547 this = self.expression(exp.EQ, this=left, expression=right) 7548 return self.expression(exp.SetItem, this=this, kind=kind) 7549 7550 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7551 self._match_text_seq("TRANSACTION") 7552 characteristics = self._parse_csv( 7553 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7554 ) 7555 return self.expression( 7556 exp.SetItem, 7557 expressions=characteristics, 7558 kind="TRANSACTION", 7559 **{"global": global_}, # type: ignore 7560 ) 7561 7562 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7563 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7564 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7565 7566 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7567 index = self._index 7568 set_ = self.expression( 7569 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7570 ) 7571 7572 if self._curr: 7573 self._retreat(index) 7574 return self._parse_as_command(self._prev) 7575 7576 return set_ 7577 7578 def _parse_var_from_options( 7579 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7580 ) -> t.Optional[exp.Var]: 7581 start = self._curr 7582 if not start: 7583 return None 7584 7585 option = start.text.upper() 7586 continuations = options.get(option) 7587 7588 index = self._index 7589 self._advance() 7590 for keywords in continuations or []: 7591 if isinstance(keywords, str): 7592 keywords = (keywords,) 7593 7594 if self._match_text_seq(*keywords): 7595 option = f"{option} {' '.join(keywords)}" 7596 break 7597 else: 7598 if continuations or continuations is None: 7599 if raise_unmatched: 7600 self.raise_error(f"Unknown option {option}") 7601 7602 self._retreat(index) 7603 return None 7604 7605 return exp.var(option) 7606 7607 def _parse_as_command(self, start: Token) -> exp.Command: 7608 while self._curr: 7609 self._advance() 7610 text = self._find_sql(start, self._prev) 7611 size = len(start.text) 7612 self._warn_unsupported() 7613 return exp.Command(this=text[:size], expression=text[size:]) 7614 7615 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7616 settings = [] 7617 7618 self._match_l_paren() 7619 kind = self._parse_id_var() 7620 7621 if self._match(TokenType.L_PAREN): 7622 while True: 7623 key = self._parse_id_var() 7624 value = self._parse_primary() 7625 if not key and value is None: 7626 break 7627 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7628 self._match(TokenType.R_PAREN) 7629 7630 self._match_r_paren() 7631 7632 return self.expression( 7633 exp.DictProperty, 7634 this=this, 7635 kind=kind.this if kind else None, 7636 settings=settings, 7637 ) 7638 7639 def _parse_dict_range(self, this: str) -> exp.DictRange: 7640 self._match_l_paren() 7641 has_min = self._match_text_seq("MIN") 7642 if has_min: 7643 min = self._parse_var() or self._parse_primary() 7644 self._match_text_seq("MAX") 7645 max = self._parse_var() or self._parse_primary() 7646 else: 7647 max = self._parse_var() or self._parse_primary() 7648 min = exp.Literal.number(0) 7649 self._match_r_paren() 7650 return self.expression(exp.DictRange, this=this, min=min, max=max) 7651 7652 def _parse_comprehension( 7653 self, this: t.Optional[exp.Expression] 7654 ) -> t.Optional[exp.Comprehension]: 7655 index = self._index 7656 expression = self._parse_column() 7657 if not self._match(TokenType.IN): 7658 self._retreat(index - 1) 7659 return None 7660 iterator = self._parse_column() 7661 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7662 return self.expression( 7663 exp.Comprehension, 7664 this=this, 7665 expression=expression, 7666 iterator=iterator, 7667 condition=condition, 7668 ) 7669 7670 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7671 if self._match(TokenType.HEREDOC_STRING): 7672 return self.expression(exp.Heredoc, this=self._prev.text) 7673 7674 if not self._match_text_seq("$"): 7675 return None 7676 7677 tags = ["$"] 7678 tag_text = None 7679 7680 if self._is_connected(): 7681 self._advance() 7682 tags.append(self._prev.text.upper()) 7683 else: 7684 self.raise_error("No closing $ found") 7685 7686 if tags[-1] != "$": 7687 if self._is_connected() and self._match_text_seq("$"): 7688 tag_text = tags[-1] 7689 tags.append("$") 7690 else: 7691 self.raise_error("No closing $ found") 7692 7693 heredoc_start = self._curr 7694 7695 while self._curr: 7696 if self._match_text_seq(*tags, advance=False): 7697 this = self._find_sql(heredoc_start, self._prev) 7698 self._advance(len(tags)) 7699 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7700 7701 self._advance() 7702 7703 self.raise_error(f"No closing {''.join(tags)} found") 7704 return None 7705 7706 def _find_parser( 7707 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7708 ) -> t.Optional[t.Callable]: 7709 if not self._curr: 7710 return None 7711 7712 index = self._index 7713 this = [] 7714 while True: 7715 # The current token might be multiple words 7716 curr = self._curr.text.upper() 7717 key = curr.split(" ") 7718 this.append(curr) 7719 7720 self._advance() 7721 result, trie = in_trie(trie, key) 7722 if result == TrieResult.FAILED: 7723 break 7724 7725 if result == TrieResult.EXISTS: 7726 subparser = parsers[" ".join(this)] 7727 return subparser 7728 7729 self._retreat(index) 7730 return None 7731 7732 def _match(self, token_type, advance=True, expression=None): 7733 if not self._curr: 7734 return None 7735 7736 if self._curr.token_type == token_type: 7737 if advance: 7738 self._advance() 7739 self._add_comments(expression) 7740 return True 7741 7742 return None 7743 7744 def _match_set(self, types, advance=True): 7745 if not self._curr: 7746 return None 7747 7748 if self._curr.token_type in types: 7749 if advance: 7750 self._advance() 7751 return True 7752 7753 return None 7754 7755 def _match_pair(self, token_type_a, token_type_b, advance=True): 7756 if not self._curr or not self._next: 7757 return None 7758 7759 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7760 if advance: 7761 self._advance(2) 7762 return True 7763 7764 return None 7765 7766 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7767 if not self._match(TokenType.L_PAREN, expression=expression): 7768 self.raise_error("Expecting (") 7769 7770 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7771 if not self._match(TokenType.R_PAREN, expression=expression): 7772 self.raise_error("Expecting )") 7773 7774 def _match_texts(self, texts, advance=True): 7775 if ( 7776 self._curr 7777 and self._curr.token_type != TokenType.STRING 7778 and self._curr.text.upper() in texts 7779 ): 7780 if advance: 7781 self._advance() 7782 return True 7783 return None 7784 7785 def _match_text_seq(self, *texts, advance=True): 7786 index = self._index 7787 for text in texts: 7788 if ( 7789 self._curr 7790 and self._curr.token_type != TokenType.STRING 7791 and self._curr.text.upper() == text 7792 ): 7793 self._advance() 7794 else: 7795 self._retreat(index) 7796 return None 7797 7798 if not advance: 7799 self._retreat(index) 7800 7801 return True 7802 7803 def _replace_lambda( 7804 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7805 ) -> t.Optional[exp.Expression]: 7806 if not node: 7807 return node 7808 7809 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7810 7811 for column in node.find_all(exp.Column): 7812 typ = lambda_types.get(column.parts[0].name) 7813 if typ is not None: 7814 dot_or_id = column.to_dot() if column.table else column.this 7815 7816 if typ: 7817 dot_or_id = self.expression( 7818 exp.Cast, 7819 this=dot_or_id, 7820 to=typ, 7821 ) 7822 7823 parent = column.parent 7824 7825 while isinstance(parent, exp.Dot): 7826 if not isinstance(parent.parent, exp.Dot): 7827 parent.replace(dot_or_id) 7828 break 7829 parent = parent.parent 7830 else: 7831 if column is node: 7832 node = dot_or_id 7833 else: 7834 column.replace(dot_or_id) 7835 return node 7836 7837 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7838 start = self._prev 7839 7840 # Not to be confused with TRUNCATE(number, decimals) function call 7841 if self._match(TokenType.L_PAREN): 7842 self._retreat(self._index - 2) 7843 return self._parse_function() 7844 7845 # Clickhouse supports TRUNCATE DATABASE as well 7846 is_database = self._match(TokenType.DATABASE) 7847 7848 self._match(TokenType.TABLE) 7849 7850 exists = self._parse_exists(not_=False) 7851 7852 expressions = self._parse_csv( 7853 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7854 ) 7855 7856 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7857 7858 if self._match_text_seq("RESTART", "IDENTITY"): 7859 identity = "RESTART" 7860 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7861 identity = "CONTINUE" 7862 else: 7863 identity = None 7864 7865 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7866 option = self._prev.text 7867 else: 7868 option = None 7869 7870 partition = self._parse_partition() 7871 7872 # Fallback case 7873 if self._curr: 7874 return self._parse_as_command(start) 7875 7876 return self.expression( 7877 exp.TruncateTable, 7878 expressions=expressions, 7879 is_database=is_database, 7880 exists=exists, 7881 cluster=cluster, 7882 identity=identity, 7883 option=option, 7884 partition=partition, 7885 ) 7886 7887 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7888 this = self._parse_ordered(self._parse_opclass) 7889 7890 if not self._match(TokenType.WITH): 7891 return this 7892 7893 op = self._parse_var(any_token=True) 7894 7895 return self.expression(exp.WithOperator, this=this, op=op) 7896 7897 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7898 self._match(TokenType.EQ) 7899 self._match(TokenType.L_PAREN) 7900 7901 opts: t.List[t.Optional[exp.Expression]] = [] 7902 while self._curr and not self._match(TokenType.R_PAREN): 7903 if self._match_text_seq("FORMAT_NAME", "="): 7904 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 7905 prop = self.expression( 7906 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_table_parts() 7907 ) 7908 opts.append(prop) 7909 else: 7910 opts.append(self._parse_property()) 7911 7912 self._match(TokenType.COMMA) 7913 7914 return opts 7915 7916 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7917 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7918 7919 options = [] 7920 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7921 option = self._parse_var(any_token=True) 7922 prev = self._prev.text.upper() 7923 7924 # Different dialects might separate options and values by white space, "=" and "AS" 7925 self._match(TokenType.EQ) 7926 self._match(TokenType.ALIAS) 7927 7928 param = self.expression(exp.CopyParameter, this=option) 7929 7930 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7931 TokenType.L_PAREN, advance=False 7932 ): 7933 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7934 param.set("expressions", self._parse_wrapped_options()) 7935 elif prev == "FILE_FORMAT": 7936 # T-SQL's external file format case 7937 param.set("expression", self._parse_field()) 7938 else: 7939 param.set("expression", self._parse_unquoted_field()) 7940 7941 options.append(param) 7942 self._match(sep) 7943 7944 return options 7945 7946 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7947 expr = self.expression(exp.Credentials) 7948 7949 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7950 expr.set("storage", self._parse_field()) 7951 if self._match_text_seq("CREDENTIALS"): 7952 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7953 creds = ( 7954 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7955 ) 7956 expr.set("credentials", creds) 7957 if self._match_text_seq("ENCRYPTION"): 7958 expr.set("encryption", self._parse_wrapped_options()) 7959 if self._match_text_seq("IAM_ROLE"): 7960 expr.set("iam_role", self._parse_field()) 7961 if self._match_text_seq("REGION"): 7962 expr.set("region", self._parse_field()) 7963 7964 return expr 7965 7966 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7967 return self._parse_field() 7968 7969 def _parse_copy(self) -> exp.Copy | exp.Command: 7970 start = self._prev 7971 7972 self._match(TokenType.INTO) 7973 7974 this = ( 7975 self._parse_select(nested=True, parse_subquery_alias=False) 7976 if self._match(TokenType.L_PAREN, advance=False) 7977 else self._parse_table(schema=True) 7978 ) 7979 7980 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7981 7982 files = self._parse_csv(self._parse_file_location) 7983 credentials = self._parse_credentials() 7984 7985 self._match_text_seq("WITH") 7986 7987 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7988 7989 # Fallback case 7990 if self._curr: 7991 return self._parse_as_command(start) 7992 7993 return self.expression( 7994 exp.Copy, 7995 this=this, 7996 kind=kind, 7997 credentials=credentials, 7998 files=files, 7999 params=params, 8000 ) 8001 8002 def _parse_normalize(self) -> exp.Normalize: 8003 return self.expression( 8004 exp.Normalize, 8005 this=self._parse_bitwise(), 8006 form=self._match(TokenType.COMMA) and self._parse_var(), 8007 ) 8008 8009 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8010 args = self._parse_csv(lambda: self._parse_lambda()) 8011 8012 this = seq_get(args, 0) 8013 decimals = seq_get(args, 1) 8014 8015 return expr_type( 8016 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8017 ) 8018 8019 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8020 if self._match_text_seq("COLUMNS", "(", advance=False): 8021 this = self._parse_function() 8022 if isinstance(this, exp.Columns): 8023 this.set("unpack", True) 8024 return this 8025 8026 return self.expression( 8027 exp.Star, 8028 **{ # type: ignore 8029 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8030 "replace": self._parse_star_op("REPLACE"), 8031 "rename": self._parse_star_op("RENAME"), 8032 }, 8033 ) 8034 8035 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8036 privilege_parts = [] 8037 8038 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8039 # (end of privilege list) or L_PAREN (start of column list) are met 8040 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8041 privilege_parts.append(self._curr.text.upper()) 8042 self._advance() 8043 8044 this = exp.var(" ".join(privilege_parts)) 8045 expressions = ( 8046 self._parse_wrapped_csv(self._parse_column) 8047 if self._match(TokenType.L_PAREN, advance=False) 8048 else None 8049 ) 8050 8051 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8052 8053 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8054 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8055 principal = self._parse_id_var() 8056 8057 if not principal: 8058 return None 8059 8060 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8061 8062 def _parse_grant(self) -> exp.Grant | exp.Command: 8063 start = self._prev 8064 8065 privileges = self._parse_csv(self._parse_grant_privilege) 8066 8067 self._match(TokenType.ON) 8068 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8069 8070 # Attempt to parse the securable e.g. MySQL allows names 8071 # such as "foo.*", "*.*" which are not easily parseable yet 8072 securable = self._try_parse(self._parse_table_parts) 8073 8074 if not securable or not self._match_text_seq("TO"): 8075 return self._parse_as_command(start) 8076 8077 principals = self._parse_csv(self._parse_grant_principal) 8078 8079 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8080 8081 if self._curr: 8082 return self._parse_as_command(start) 8083 8084 return self.expression( 8085 exp.Grant, 8086 privileges=privileges, 8087 kind=kind, 8088 securable=securable, 8089 principals=principals, 8090 grant_option=grant_option, 8091 ) 8092 8093 def _parse_overlay(self) -> exp.Overlay: 8094 return self.expression( 8095 exp.Overlay, 8096 **{ # type: ignore 8097 "this": self._parse_bitwise(), 8098 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8099 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8100 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8101 }, 8102 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.OBJECT, 401 TokenType.OBJECT_IDENTIFIER, 402 TokenType.INET, 403 TokenType.IPADDRESS, 404 TokenType.IPPREFIX, 405 TokenType.IPV4, 406 TokenType.IPV6, 407 TokenType.UNKNOWN, 408 TokenType.NULL, 409 TokenType.NAME, 410 TokenType.TDIGEST, 411 TokenType.DYNAMIC, 412 *ENUM_TYPE_TOKENS, 413 *NESTED_TYPE_TOKENS, 414 *AGGREGATE_TYPE_TOKENS, 415 } 416 417 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 418 TokenType.BIGINT: TokenType.UBIGINT, 419 TokenType.INT: TokenType.UINT, 420 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 421 TokenType.SMALLINT: TokenType.USMALLINT, 422 TokenType.TINYINT: TokenType.UTINYINT, 423 TokenType.DECIMAL: TokenType.UDECIMAL, 424 TokenType.DOUBLE: TokenType.UDOUBLE, 425 } 426 427 SUBQUERY_PREDICATES = { 428 TokenType.ANY: exp.Any, 429 TokenType.ALL: exp.All, 430 TokenType.EXISTS: exp.Exists, 431 TokenType.SOME: exp.Any, 432 } 433 434 RESERVED_TOKENS = { 435 *Tokenizer.SINGLE_TOKENS.values(), 436 TokenType.SELECT, 437 } - {TokenType.IDENTIFIER} 438 439 DB_CREATABLES = { 440 TokenType.DATABASE, 441 TokenType.DICTIONARY, 442 TokenType.MODEL, 443 TokenType.NAMESPACE, 444 TokenType.SCHEMA, 445 TokenType.SEQUENCE, 446 TokenType.SINK, 447 TokenType.SOURCE, 448 TokenType.STORAGE_INTEGRATION, 449 TokenType.STREAMLIT, 450 TokenType.TABLE, 451 TokenType.TAG, 452 TokenType.VIEW, 453 TokenType.WAREHOUSE, 454 } 455 456 CREATABLES = { 457 TokenType.COLUMN, 458 TokenType.CONSTRAINT, 459 TokenType.FOREIGN_KEY, 460 TokenType.FUNCTION, 461 TokenType.INDEX, 462 TokenType.PROCEDURE, 463 *DB_CREATABLES, 464 } 465 466 ALTERABLES = { 467 TokenType.INDEX, 468 TokenType.TABLE, 469 TokenType.VIEW, 470 } 471 472 # Tokens that can represent identifiers 473 ID_VAR_TOKENS = { 474 TokenType.ALL, 475 TokenType.ATTACH, 476 TokenType.VAR, 477 TokenType.ANTI, 478 TokenType.APPLY, 479 TokenType.ASC, 480 TokenType.ASOF, 481 TokenType.AUTO_INCREMENT, 482 TokenType.BEGIN, 483 TokenType.BPCHAR, 484 TokenType.CACHE, 485 TokenType.CASE, 486 TokenType.COLLATE, 487 TokenType.COMMAND, 488 TokenType.COMMENT, 489 TokenType.COMMIT, 490 TokenType.CONSTRAINT, 491 TokenType.COPY, 492 TokenType.CUBE, 493 TokenType.CURRENT_SCHEMA, 494 TokenType.DEFAULT, 495 TokenType.DELETE, 496 TokenType.DESC, 497 TokenType.DESCRIBE, 498 TokenType.DETACH, 499 TokenType.DICTIONARY, 500 TokenType.DIV, 501 TokenType.END, 502 TokenType.EXECUTE, 503 TokenType.EXPORT, 504 TokenType.ESCAPE, 505 TokenType.FALSE, 506 TokenType.FIRST, 507 TokenType.FILTER, 508 TokenType.FINAL, 509 TokenType.FORMAT, 510 TokenType.FULL, 511 TokenType.IDENTIFIER, 512 TokenType.IS, 513 TokenType.ISNULL, 514 TokenType.INTERVAL, 515 TokenType.KEEP, 516 TokenType.KILL, 517 TokenType.LEFT, 518 TokenType.LIMIT, 519 TokenType.LOAD, 520 TokenType.MERGE, 521 TokenType.NATURAL, 522 TokenType.NEXT, 523 TokenType.OFFSET, 524 TokenType.OPERATOR, 525 TokenType.ORDINALITY, 526 TokenType.OVERLAPS, 527 TokenType.OVERWRITE, 528 TokenType.PARTITION, 529 TokenType.PERCENT, 530 TokenType.PIVOT, 531 TokenType.PRAGMA, 532 TokenType.PUT, 533 TokenType.RANGE, 534 TokenType.RECURSIVE, 535 TokenType.REFERENCES, 536 TokenType.REFRESH, 537 TokenType.RENAME, 538 TokenType.REPLACE, 539 TokenType.RIGHT, 540 TokenType.ROLLUP, 541 TokenType.ROW, 542 TokenType.ROWS, 543 TokenType.SEMI, 544 TokenType.SET, 545 TokenType.SETTINGS, 546 TokenType.SHOW, 547 TokenType.TEMPORARY, 548 TokenType.TOP, 549 TokenType.TRUE, 550 TokenType.TRUNCATE, 551 TokenType.UNIQUE, 552 TokenType.UNNEST, 553 TokenType.UNPIVOT, 554 TokenType.UPDATE, 555 TokenType.USE, 556 TokenType.VOLATILE, 557 TokenType.WINDOW, 558 *CREATABLES, 559 *SUBQUERY_PREDICATES, 560 *TYPE_TOKENS, 561 *NO_PAREN_FUNCTIONS, 562 } 563 ID_VAR_TOKENS.remove(TokenType.UNION) 564 565 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 566 TokenType.ANTI, 567 TokenType.APPLY, 568 TokenType.ASOF, 569 TokenType.FULL, 570 TokenType.LEFT, 571 TokenType.LOCK, 572 TokenType.NATURAL, 573 TokenType.RIGHT, 574 TokenType.SEMI, 575 TokenType.WINDOW, 576 } 577 578 ALIAS_TOKENS = ID_VAR_TOKENS 579 580 ARRAY_CONSTRUCTORS = { 581 "ARRAY": exp.Array, 582 "LIST": exp.List, 583 } 584 585 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 586 587 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 588 589 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 590 591 FUNC_TOKENS = { 592 TokenType.COLLATE, 593 TokenType.COMMAND, 594 TokenType.CURRENT_DATE, 595 TokenType.CURRENT_DATETIME, 596 TokenType.CURRENT_SCHEMA, 597 TokenType.CURRENT_TIMESTAMP, 598 TokenType.CURRENT_TIME, 599 TokenType.CURRENT_USER, 600 TokenType.FILTER, 601 TokenType.FIRST, 602 TokenType.FORMAT, 603 TokenType.GLOB, 604 TokenType.IDENTIFIER, 605 TokenType.INDEX, 606 TokenType.ISNULL, 607 TokenType.ILIKE, 608 TokenType.INSERT, 609 TokenType.LIKE, 610 TokenType.MERGE, 611 TokenType.NEXT, 612 TokenType.OFFSET, 613 TokenType.PRIMARY_KEY, 614 TokenType.RANGE, 615 TokenType.REPLACE, 616 TokenType.RLIKE, 617 TokenType.ROW, 618 TokenType.UNNEST, 619 TokenType.VAR, 620 TokenType.LEFT, 621 TokenType.RIGHT, 622 TokenType.SEQUENCE, 623 TokenType.DATE, 624 TokenType.DATETIME, 625 TokenType.TABLE, 626 TokenType.TIMESTAMP, 627 TokenType.TIMESTAMPTZ, 628 TokenType.TRUNCATE, 629 TokenType.WINDOW, 630 TokenType.XOR, 631 *TYPE_TOKENS, 632 *SUBQUERY_PREDICATES, 633 } 634 635 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 636 TokenType.AND: exp.And, 637 } 638 639 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 640 TokenType.COLON_EQ: exp.PropertyEQ, 641 } 642 643 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 644 TokenType.OR: exp.Or, 645 } 646 647 EQUALITY = { 648 TokenType.EQ: exp.EQ, 649 TokenType.NEQ: exp.NEQ, 650 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 651 } 652 653 COMPARISON = { 654 TokenType.GT: exp.GT, 655 TokenType.GTE: exp.GTE, 656 TokenType.LT: exp.LT, 657 TokenType.LTE: exp.LTE, 658 } 659 660 BITWISE = { 661 TokenType.AMP: exp.BitwiseAnd, 662 TokenType.CARET: exp.BitwiseXor, 663 TokenType.PIPE: exp.BitwiseOr, 664 } 665 666 TERM = { 667 TokenType.DASH: exp.Sub, 668 TokenType.PLUS: exp.Add, 669 TokenType.MOD: exp.Mod, 670 TokenType.COLLATE: exp.Collate, 671 } 672 673 FACTOR = { 674 TokenType.DIV: exp.IntDiv, 675 TokenType.LR_ARROW: exp.Distance, 676 TokenType.SLASH: exp.Div, 677 TokenType.STAR: exp.Mul, 678 } 679 680 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 681 682 TIMES = { 683 TokenType.TIME, 684 TokenType.TIMETZ, 685 } 686 687 TIMESTAMPS = { 688 TokenType.TIMESTAMP, 689 TokenType.TIMESTAMPNTZ, 690 TokenType.TIMESTAMPTZ, 691 TokenType.TIMESTAMPLTZ, 692 *TIMES, 693 } 694 695 SET_OPERATIONS = { 696 TokenType.UNION, 697 TokenType.INTERSECT, 698 TokenType.EXCEPT, 699 } 700 701 JOIN_METHODS = { 702 TokenType.ASOF, 703 TokenType.NATURAL, 704 TokenType.POSITIONAL, 705 } 706 707 JOIN_SIDES = { 708 TokenType.LEFT, 709 TokenType.RIGHT, 710 TokenType.FULL, 711 } 712 713 JOIN_KINDS = { 714 TokenType.ANTI, 715 TokenType.CROSS, 716 TokenType.INNER, 717 TokenType.OUTER, 718 TokenType.SEMI, 719 TokenType.STRAIGHT_JOIN, 720 } 721 722 JOIN_HINTS: t.Set[str] = set() 723 724 LAMBDAS = { 725 TokenType.ARROW: lambda self, expressions: self.expression( 726 exp.Lambda, 727 this=self._replace_lambda( 728 self._parse_assignment(), 729 expressions, 730 ), 731 expressions=expressions, 732 ), 733 TokenType.FARROW: lambda self, expressions: self.expression( 734 exp.Kwarg, 735 this=exp.var(expressions[0].name), 736 expression=self._parse_assignment(), 737 ), 738 } 739 740 COLUMN_OPERATORS = { 741 TokenType.DOT: None, 742 TokenType.DOTCOLON: lambda self, this, to: self.expression( 743 exp.JSONCast, 744 this=this, 745 to=to, 746 ), 747 TokenType.DCOLON: lambda self, this, to: self.expression( 748 exp.Cast if self.STRICT_CAST else exp.TryCast, 749 this=this, 750 to=to, 751 ), 752 TokenType.ARROW: lambda self, this, path: self.expression( 753 exp.JSONExtract, 754 this=this, 755 expression=self.dialect.to_json_path(path), 756 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 757 ), 758 TokenType.DARROW: lambda self, this, path: self.expression( 759 exp.JSONExtractScalar, 760 this=this, 761 expression=self.dialect.to_json_path(path), 762 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 763 ), 764 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 765 exp.JSONBExtract, 766 this=this, 767 expression=path, 768 ), 769 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 770 exp.JSONBExtractScalar, 771 this=this, 772 expression=path, 773 ), 774 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 775 exp.JSONBContains, 776 this=this, 777 expression=key, 778 ), 779 } 780 781 EXPRESSION_PARSERS = { 782 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 783 exp.Column: lambda self: self._parse_column(), 784 exp.Condition: lambda self: self._parse_assignment(), 785 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 786 exp.Expression: lambda self: self._parse_expression(), 787 exp.From: lambda self: self._parse_from(joins=True), 788 exp.Group: lambda self: self._parse_group(), 789 exp.Having: lambda self: self._parse_having(), 790 exp.Hint: lambda self: self._parse_hint_body(), 791 exp.Identifier: lambda self: self._parse_id_var(), 792 exp.Join: lambda self: self._parse_join(), 793 exp.Lambda: lambda self: self._parse_lambda(), 794 exp.Lateral: lambda self: self._parse_lateral(), 795 exp.Limit: lambda self: self._parse_limit(), 796 exp.Offset: lambda self: self._parse_offset(), 797 exp.Order: lambda self: self._parse_order(), 798 exp.Ordered: lambda self: self._parse_ordered(), 799 exp.Properties: lambda self: self._parse_properties(), 800 exp.Qualify: lambda self: self._parse_qualify(), 801 exp.Returning: lambda self: self._parse_returning(), 802 exp.Select: lambda self: self._parse_select(), 803 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 804 exp.Table: lambda self: self._parse_table_parts(), 805 exp.TableAlias: lambda self: self._parse_table_alias(), 806 exp.Tuple: lambda self: self._parse_value(), 807 exp.Whens: lambda self: self._parse_when_matched(), 808 exp.Where: lambda self: self._parse_where(), 809 exp.Window: lambda self: self._parse_named_window(), 810 exp.With: lambda self: self._parse_with(), 811 "JOIN_TYPE": lambda self: self._parse_join_parts(), 812 } 813 814 STATEMENT_PARSERS = { 815 TokenType.ALTER: lambda self: self._parse_alter(), 816 TokenType.ANALYZE: lambda self: self._parse_analyze(), 817 TokenType.BEGIN: lambda self: self._parse_transaction(), 818 TokenType.CACHE: lambda self: self._parse_cache(), 819 TokenType.COMMENT: lambda self: self._parse_comment(), 820 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 821 TokenType.COPY: lambda self: self._parse_copy(), 822 TokenType.CREATE: lambda self: self._parse_create(), 823 TokenType.DELETE: lambda self: self._parse_delete(), 824 TokenType.DESC: lambda self: self._parse_describe(), 825 TokenType.DESCRIBE: lambda self: self._parse_describe(), 826 TokenType.DROP: lambda self: self._parse_drop(), 827 TokenType.GRANT: lambda self: self._parse_grant(), 828 TokenType.INSERT: lambda self: self._parse_insert(), 829 TokenType.KILL: lambda self: self._parse_kill(), 830 TokenType.LOAD: lambda self: self._parse_load(), 831 TokenType.MERGE: lambda self: self._parse_merge(), 832 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 833 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 834 TokenType.REFRESH: lambda self: self._parse_refresh(), 835 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 836 TokenType.SET: lambda self: self._parse_set(), 837 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 838 TokenType.UNCACHE: lambda self: self._parse_uncache(), 839 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 840 TokenType.UPDATE: lambda self: self._parse_update(), 841 TokenType.USE: lambda self: self._parse_use(), 842 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 843 } 844 845 UNARY_PARSERS = { 846 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 847 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 848 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 849 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 850 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 851 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 852 } 853 854 STRING_PARSERS = { 855 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 856 exp.RawString, this=token.text 857 ), 858 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 859 exp.National, this=token.text 860 ), 861 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 862 TokenType.STRING: lambda self, token: self.expression( 863 exp.Literal, this=token.text, is_string=True 864 ), 865 TokenType.UNICODE_STRING: lambda self, token: self.expression( 866 exp.UnicodeString, 867 this=token.text, 868 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 869 ), 870 } 871 872 NUMERIC_PARSERS = { 873 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 874 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 875 TokenType.HEX_STRING: lambda self, token: self.expression( 876 exp.HexString, 877 this=token.text, 878 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 879 ), 880 TokenType.NUMBER: lambda self, token: self.expression( 881 exp.Literal, this=token.text, is_string=False 882 ), 883 } 884 885 PRIMARY_PARSERS = { 886 **STRING_PARSERS, 887 **NUMERIC_PARSERS, 888 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 889 TokenType.NULL: lambda self, _: self.expression(exp.Null), 890 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 891 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 892 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 893 TokenType.STAR: lambda self, _: self._parse_star_ops(), 894 } 895 896 PLACEHOLDER_PARSERS = { 897 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 898 TokenType.PARAMETER: lambda self: self._parse_parameter(), 899 TokenType.COLON: lambda self: ( 900 self.expression(exp.Placeholder, this=self._prev.text) 901 if self._match_set(self.ID_VAR_TOKENS) 902 else None 903 ), 904 } 905 906 RANGE_PARSERS = { 907 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 908 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 909 TokenType.GLOB: binary_range_parser(exp.Glob), 910 TokenType.ILIKE: binary_range_parser(exp.ILike), 911 TokenType.IN: lambda self, this: self._parse_in(this), 912 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 913 TokenType.IS: lambda self, this: self._parse_is(this), 914 TokenType.LIKE: binary_range_parser(exp.Like), 915 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 916 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 917 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 918 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 919 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 920 } 921 922 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 923 "ALLOWED_VALUES": lambda self: self.expression( 924 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 925 ), 926 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 927 "AUTO": lambda self: self._parse_auto_property(), 928 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 929 "BACKUP": lambda self: self.expression( 930 exp.BackupProperty, this=self._parse_var(any_token=True) 931 ), 932 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 933 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 934 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 935 "CHECKSUM": lambda self: self._parse_checksum(), 936 "CLUSTER BY": lambda self: self._parse_cluster(), 937 "CLUSTERED": lambda self: self._parse_clustered_by(), 938 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 939 exp.CollateProperty, **kwargs 940 ), 941 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 942 "CONTAINS": lambda self: self._parse_contains_property(), 943 "COPY": lambda self: self._parse_copy_property(), 944 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 945 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 946 "DEFINER": lambda self: self._parse_definer(), 947 "DETERMINISTIC": lambda self: self.expression( 948 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 949 ), 950 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 951 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 952 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 953 "DISTKEY": lambda self: self._parse_distkey(), 954 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 955 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 956 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 957 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 958 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 959 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 960 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 961 "FREESPACE": lambda self: self._parse_freespace(), 962 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 963 "HEAP": lambda self: self.expression(exp.HeapProperty), 964 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 965 "IMMUTABLE": lambda self: self.expression( 966 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 967 ), 968 "INHERITS": lambda self: self.expression( 969 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 970 ), 971 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 972 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 973 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 974 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 975 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 976 "LIKE": lambda self: self._parse_create_like(), 977 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 978 "LOCK": lambda self: self._parse_locking(), 979 "LOCKING": lambda self: self._parse_locking(), 980 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 981 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 982 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 983 "MODIFIES": lambda self: self._parse_modifies_property(), 984 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 985 "NO": lambda self: self._parse_no_property(), 986 "ON": lambda self: self._parse_on_property(), 987 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 988 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 989 "PARTITION": lambda self: self._parse_partitioned_of(), 990 "PARTITION BY": lambda self: self._parse_partitioned_by(), 991 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 992 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 993 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 994 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 995 "READS": lambda self: self._parse_reads_property(), 996 "REMOTE": lambda self: self._parse_remote_with_connection(), 997 "RETURNS": lambda self: self._parse_returns(), 998 "STRICT": lambda self: self.expression(exp.StrictProperty), 999 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1000 "ROW": lambda self: self._parse_row(), 1001 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1002 "SAMPLE": lambda self: self.expression( 1003 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1004 ), 1005 "SECURE": lambda self: self.expression(exp.SecureProperty), 1006 "SECURITY": lambda self: self._parse_security(), 1007 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1008 "SETTINGS": lambda self: self._parse_settings_property(), 1009 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1010 "SORTKEY": lambda self: self._parse_sortkey(), 1011 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1012 "STABLE": lambda self: self.expression( 1013 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1014 ), 1015 "STORED": lambda self: self._parse_stored(), 1016 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1017 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1018 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1019 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1020 "TO": lambda self: self._parse_to_table(), 1021 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1022 "TRANSFORM": lambda self: self.expression( 1023 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1024 ), 1025 "TTL": lambda self: self._parse_ttl(), 1026 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1027 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1028 "VOLATILE": lambda self: self._parse_volatile_property(), 1029 "WITH": lambda self: self._parse_with_property(), 1030 } 1031 1032 CONSTRAINT_PARSERS = { 1033 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1034 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1035 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1036 "CHARACTER SET": lambda self: self.expression( 1037 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1038 ), 1039 "CHECK": lambda self: self.expression( 1040 exp.CheckColumnConstraint, 1041 this=self._parse_wrapped(self._parse_assignment), 1042 enforced=self._match_text_seq("ENFORCED"), 1043 ), 1044 "COLLATE": lambda self: self.expression( 1045 exp.CollateColumnConstraint, 1046 this=self._parse_identifier() or self._parse_column(), 1047 ), 1048 "COMMENT": lambda self: self.expression( 1049 exp.CommentColumnConstraint, this=self._parse_string() 1050 ), 1051 "COMPRESS": lambda self: self._parse_compress(), 1052 "CLUSTERED": lambda self: self.expression( 1053 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1054 ), 1055 "NONCLUSTERED": lambda self: self.expression( 1056 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1057 ), 1058 "DEFAULT": lambda self: self.expression( 1059 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1060 ), 1061 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1062 "EPHEMERAL": lambda self: self.expression( 1063 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1064 ), 1065 "EXCLUDE": lambda self: self.expression( 1066 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1067 ), 1068 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1069 "FORMAT": lambda self: self.expression( 1070 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1071 ), 1072 "GENERATED": lambda self: self._parse_generated_as_identity(), 1073 "IDENTITY": lambda self: self._parse_auto_increment(), 1074 "INLINE": lambda self: self._parse_inline(), 1075 "LIKE": lambda self: self._parse_create_like(), 1076 "NOT": lambda self: self._parse_not_constraint(), 1077 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1078 "ON": lambda self: ( 1079 self._match(TokenType.UPDATE) 1080 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1081 ) 1082 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1083 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1084 "PERIOD": lambda self: self._parse_period_for_system_time(), 1085 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1086 "REFERENCES": lambda self: self._parse_references(match=False), 1087 "TITLE": lambda self: self.expression( 1088 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1089 ), 1090 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1091 "UNIQUE": lambda self: self._parse_unique(), 1092 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1093 "WATERMARK": lambda self: self.expression( 1094 exp.WatermarkColumnConstraint, 1095 this=self._match(TokenType.FOR) and self._parse_column(), 1096 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1097 ), 1098 "WITH": lambda self: self.expression( 1099 exp.Properties, expressions=self._parse_wrapped_properties() 1100 ), 1101 } 1102 1103 ALTER_PARSERS = { 1104 "ADD": lambda self: self._parse_alter_table_add(), 1105 "AS": lambda self: self._parse_select(), 1106 "ALTER": lambda self: self._parse_alter_table_alter(), 1107 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1108 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1109 "DROP": lambda self: self._parse_alter_table_drop(), 1110 "RENAME": lambda self: self._parse_alter_table_rename(), 1111 "SET": lambda self: self._parse_alter_table_set(), 1112 "SWAP": lambda self: self.expression( 1113 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1114 ), 1115 } 1116 1117 ALTER_ALTER_PARSERS = { 1118 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1119 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1120 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1121 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1122 } 1123 1124 SCHEMA_UNNAMED_CONSTRAINTS = { 1125 "CHECK", 1126 "EXCLUDE", 1127 "FOREIGN KEY", 1128 "LIKE", 1129 "PERIOD", 1130 "PRIMARY KEY", 1131 "UNIQUE", 1132 "WATERMARK", 1133 } 1134 1135 NO_PAREN_FUNCTION_PARSERS = { 1136 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1137 "CASE": lambda self: self._parse_case(), 1138 "CONNECT_BY_ROOT": lambda self: self.expression( 1139 exp.ConnectByRoot, this=self._parse_column() 1140 ), 1141 "IF": lambda self: self._parse_if(), 1142 } 1143 1144 INVALID_FUNC_NAME_TOKENS = { 1145 TokenType.IDENTIFIER, 1146 TokenType.STRING, 1147 } 1148 1149 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1150 1151 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1152 1153 FUNCTION_PARSERS = { 1154 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1155 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1156 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1157 "DECODE": lambda self: self._parse_decode(), 1158 "EXTRACT": lambda self: self._parse_extract(), 1159 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1160 "GAP_FILL": lambda self: self._parse_gap_fill(), 1161 "JSON_OBJECT": lambda self: self._parse_json_object(), 1162 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1163 "JSON_TABLE": lambda self: self._parse_json_table(), 1164 "MATCH": lambda self: self._parse_match_against(), 1165 "NORMALIZE": lambda self: self._parse_normalize(), 1166 "OPENJSON": lambda self: self._parse_open_json(), 1167 "OVERLAY": lambda self: self._parse_overlay(), 1168 "POSITION": lambda self: self._parse_position(), 1169 "PREDICT": lambda self: self._parse_predict(), 1170 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1171 "STRING_AGG": lambda self: self._parse_string_agg(), 1172 "SUBSTRING": lambda self: self._parse_substring(), 1173 "TRIM": lambda self: self._parse_trim(), 1174 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1175 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1176 "XMLELEMENT": lambda self: self.expression( 1177 exp.XMLElement, 1178 this=self._match_text_seq("NAME") and self._parse_id_var(), 1179 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1180 ), 1181 "XMLTABLE": lambda self: self._parse_xml_table(), 1182 } 1183 1184 QUERY_MODIFIER_PARSERS = { 1185 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1186 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1187 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1188 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1189 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1190 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1191 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1192 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1193 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1194 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1195 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1196 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1197 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1198 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1199 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1200 TokenType.CLUSTER_BY: lambda self: ( 1201 "cluster", 1202 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1203 ), 1204 TokenType.DISTRIBUTE_BY: lambda self: ( 1205 "distribute", 1206 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1207 ), 1208 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1209 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1210 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1211 } 1212 1213 SET_PARSERS = { 1214 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1215 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1216 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1217 "TRANSACTION": lambda self: self._parse_set_transaction(), 1218 } 1219 1220 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1221 1222 TYPE_LITERAL_PARSERS = { 1223 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1224 } 1225 1226 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1227 1228 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1229 1230 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1231 1232 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1233 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1234 "ISOLATION": ( 1235 ("LEVEL", "REPEATABLE", "READ"), 1236 ("LEVEL", "READ", "COMMITTED"), 1237 ("LEVEL", "READ", "UNCOMITTED"), 1238 ("LEVEL", "SERIALIZABLE"), 1239 ), 1240 "READ": ("WRITE", "ONLY"), 1241 } 1242 1243 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1244 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1245 ) 1246 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1247 1248 CREATE_SEQUENCE: OPTIONS_TYPE = { 1249 "SCALE": ("EXTEND", "NOEXTEND"), 1250 "SHARD": ("EXTEND", "NOEXTEND"), 1251 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1252 **dict.fromkeys( 1253 ( 1254 "SESSION", 1255 "GLOBAL", 1256 "KEEP", 1257 "NOKEEP", 1258 "ORDER", 1259 "NOORDER", 1260 "NOCACHE", 1261 "CYCLE", 1262 "NOCYCLE", 1263 "NOMINVALUE", 1264 "NOMAXVALUE", 1265 "NOSCALE", 1266 "NOSHARD", 1267 ), 1268 tuple(), 1269 ), 1270 } 1271 1272 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1273 1274 USABLES: OPTIONS_TYPE = dict.fromkeys( 1275 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1276 ) 1277 1278 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1279 1280 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1281 "TYPE": ("EVOLUTION",), 1282 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1283 } 1284 1285 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1286 1287 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1288 1289 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1290 "NOT": ("ENFORCED",), 1291 "MATCH": ( 1292 "FULL", 1293 "PARTIAL", 1294 "SIMPLE", 1295 ), 1296 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1297 "USING": ( 1298 "BTREE", 1299 "HASH", 1300 ), 1301 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1302 } 1303 1304 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1305 1306 CLONE_KEYWORDS = {"CLONE", "COPY"} 1307 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1308 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1309 1310 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1311 1312 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1313 1314 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1315 1316 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1317 1318 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1319 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1320 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1321 1322 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1323 1324 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1325 1326 ADD_CONSTRAINT_TOKENS = { 1327 TokenType.CONSTRAINT, 1328 TokenType.FOREIGN_KEY, 1329 TokenType.INDEX, 1330 TokenType.KEY, 1331 TokenType.PRIMARY_KEY, 1332 TokenType.UNIQUE, 1333 } 1334 1335 DISTINCT_TOKENS = {TokenType.DISTINCT} 1336 1337 NULL_TOKENS = {TokenType.NULL} 1338 1339 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1340 1341 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1342 1343 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1344 1345 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1346 1347 ODBC_DATETIME_LITERALS = { 1348 "d": exp.Date, 1349 "t": exp.Time, 1350 "ts": exp.Timestamp, 1351 } 1352 1353 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1354 1355 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1356 1357 # The style options for the DESCRIBE statement 1358 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1359 1360 # The style options for the ANALYZE statement 1361 ANALYZE_STYLES = { 1362 "BUFFER_USAGE_LIMIT", 1363 "FULL", 1364 "LOCAL", 1365 "NO_WRITE_TO_BINLOG", 1366 "SAMPLE", 1367 "SKIP_LOCKED", 1368 "VERBOSE", 1369 } 1370 1371 ANALYZE_EXPRESSION_PARSERS = { 1372 "ALL": lambda self: self._parse_analyze_columns(), 1373 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1374 "DELETE": lambda self: self._parse_analyze_delete(), 1375 "DROP": lambda self: self._parse_analyze_histogram(), 1376 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1377 "LIST": lambda self: self._parse_analyze_list(), 1378 "PREDICATE": lambda self: self._parse_analyze_columns(), 1379 "UPDATE": lambda self: self._parse_analyze_histogram(), 1380 "VALIDATE": lambda self: self._parse_analyze_validate(), 1381 } 1382 1383 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1384 1385 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1386 1387 OPERATION_MODIFIERS: t.Set[str] = set() 1388 1389 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1390 1391 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1392 1393 STRICT_CAST = True 1394 1395 PREFIXED_PIVOT_COLUMNS = False 1396 IDENTIFY_PIVOT_STRINGS = False 1397 1398 LOG_DEFAULTS_TO_LN = False 1399 1400 # Whether ADD is present for each column added by ALTER TABLE 1401 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1402 1403 # Whether the table sample clause expects CSV syntax 1404 TABLESAMPLE_CSV = False 1405 1406 # The default method used for table sampling 1407 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1408 1409 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1410 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1411 1412 # Whether the TRIM function expects the characters to trim as its first argument 1413 TRIM_PATTERN_FIRST = False 1414 1415 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1416 STRING_ALIASES = False 1417 1418 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1419 MODIFIERS_ATTACHED_TO_SET_OP = True 1420 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1421 1422 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1423 NO_PAREN_IF_COMMANDS = True 1424 1425 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1426 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1427 1428 # Whether the `:` operator is used to extract a value from a VARIANT column 1429 COLON_IS_VARIANT_EXTRACT = False 1430 1431 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1432 # If this is True and '(' is not found, the keyword will be treated as an identifier 1433 VALUES_FOLLOWED_BY_PAREN = True 1434 1435 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1436 SUPPORTS_IMPLICIT_UNNEST = False 1437 1438 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1439 INTERVAL_SPANS = True 1440 1441 # Whether a PARTITION clause can follow a table reference 1442 SUPPORTS_PARTITION_SELECTION = False 1443 1444 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1445 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1446 1447 # Whether the 'AS' keyword is optional in the CTE definition syntax 1448 OPTIONAL_ALIAS_TOKEN_CTE = True 1449 1450 __slots__ = ( 1451 "error_level", 1452 "error_message_context", 1453 "max_errors", 1454 "dialect", 1455 "sql", 1456 "errors", 1457 "_tokens", 1458 "_index", 1459 "_curr", 1460 "_next", 1461 "_prev", 1462 "_prev_comments", 1463 ) 1464 1465 # Autofilled 1466 SHOW_TRIE: t.Dict = {} 1467 SET_TRIE: t.Dict = {} 1468 1469 def __init__( 1470 self, 1471 error_level: t.Optional[ErrorLevel] = None, 1472 error_message_context: int = 100, 1473 max_errors: int = 3, 1474 dialect: DialectType = None, 1475 ): 1476 from sqlglot.dialects import Dialect 1477 1478 self.error_level = error_level or ErrorLevel.IMMEDIATE 1479 self.error_message_context = error_message_context 1480 self.max_errors = max_errors 1481 self.dialect = Dialect.get_or_raise(dialect) 1482 self.reset() 1483 1484 def reset(self): 1485 self.sql = "" 1486 self.errors = [] 1487 self._tokens = [] 1488 self._index = 0 1489 self._curr = None 1490 self._next = None 1491 self._prev = None 1492 self._prev_comments = None 1493 1494 def parse( 1495 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1496 ) -> t.List[t.Optional[exp.Expression]]: 1497 """ 1498 Parses a list of tokens and returns a list of syntax trees, one tree 1499 per parsed SQL statement. 1500 1501 Args: 1502 raw_tokens: The list of tokens. 1503 sql: The original SQL string, used to produce helpful debug messages. 1504 1505 Returns: 1506 The list of the produced syntax trees. 1507 """ 1508 return self._parse( 1509 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1510 ) 1511 1512 def parse_into( 1513 self, 1514 expression_types: exp.IntoType, 1515 raw_tokens: t.List[Token], 1516 sql: t.Optional[str] = None, 1517 ) -> t.List[t.Optional[exp.Expression]]: 1518 """ 1519 Parses a list of tokens into a given Expression type. If a collection of Expression 1520 types is given instead, this method will try to parse the token list into each one 1521 of them, stopping at the first for which the parsing succeeds. 1522 1523 Args: 1524 expression_types: The expression type(s) to try and parse the token list into. 1525 raw_tokens: The list of tokens. 1526 sql: The original SQL string, used to produce helpful debug messages. 1527 1528 Returns: 1529 The target Expression. 1530 """ 1531 errors = [] 1532 for expression_type in ensure_list(expression_types): 1533 parser = self.EXPRESSION_PARSERS.get(expression_type) 1534 if not parser: 1535 raise TypeError(f"No parser registered for {expression_type}") 1536 1537 try: 1538 return self._parse(parser, raw_tokens, sql) 1539 except ParseError as e: 1540 e.errors[0]["into_expression"] = expression_type 1541 errors.append(e) 1542 1543 raise ParseError( 1544 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1545 errors=merge_errors(errors), 1546 ) from errors[-1] 1547 1548 def _parse( 1549 self, 1550 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1551 raw_tokens: t.List[Token], 1552 sql: t.Optional[str] = None, 1553 ) -> t.List[t.Optional[exp.Expression]]: 1554 self.reset() 1555 self.sql = sql or "" 1556 1557 total = len(raw_tokens) 1558 chunks: t.List[t.List[Token]] = [[]] 1559 1560 for i, token in enumerate(raw_tokens): 1561 if token.token_type == TokenType.SEMICOLON: 1562 if token.comments: 1563 chunks.append([token]) 1564 1565 if i < total - 1: 1566 chunks.append([]) 1567 else: 1568 chunks[-1].append(token) 1569 1570 expressions = [] 1571 1572 for tokens in chunks: 1573 self._index = -1 1574 self._tokens = tokens 1575 self._advance() 1576 1577 expressions.append(parse_method(self)) 1578 1579 if self._index < len(self._tokens): 1580 self.raise_error("Invalid expression / Unexpected token") 1581 1582 self.check_errors() 1583 1584 return expressions 1585 1586 def check_errors(self) -> None: 1587 """Logs or raises any found errors, depending on the chosen error level setting.""" 1588 if self.error_level == ErrorLevel.WARN: 1589 for error in self.errors: 1590 logger.error(str(error)) 1591 elif self.error_level == ErrorLevel.RAISE and self.errors: 1592 raise ParseError( 1593 concat_messages(self.errors, self.max_errors), 1594 errors=merge_errors(self.errors), 1595 ) 1596 1597 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1598 """ 1599 Appends an error in the list of recorded errors or raises it, depending on the chosen 1600 error level setting. 1601 """ 1602 token = token or self._curr or self._prev or Token.string("") 1603 start = token.start 1604 end = token.end + 1 1605 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1606 highlight = self.sql[start:end] 1607 end_context = self.sql[end : end + self.error_message_context] 1608 1609 error = ParseError.new( 1610 f"{message}. Line {token.line}, Col: {token.col}.\n" 1611 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1612 description=message, 1613 line=token.line, 1614 col=token.col, 1615 start_context=start_context, 1616 highlight=highlight, 1617 end_context=end_context, 1618 ) 1619 1620 if self.error_level == ErrorLevel.IMMEDIATE: 1621 raise error 1622 1623 self.errors.append(error) 1624 1625 def expression( 1626 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1627 ) -> E: 1628 """ 1629 Creates a new, validated Expression. 1630 1631 Args: 1632 exp_class: The expression class to instantiate. 1633 comments: An optional list of comments to attach to the expression. 1634 kwargs: The arguments to set for the expression along with their respective values. 1635 1636 Returns: 1637 The target expression. 1638 """ 1639 instance = exp_class(**kwargs) 1640 instance.add_comments(comments) if comments else self._add_comments(instance) 1641 return self.validate_expression(instance) 1642 1643 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1644 if expression and self._prev_comments: 1645 expression.add_comments(self._prev_comments) 1646 self._prev_comments = None 1647 1648 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1649 """ 1650 Validates an Expression, making sure that all its mandatory arguments are set. 1651 1652 Args: 1653 expression: The expression to validate. 1654 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1655 1656 Returns: 1657 The validated expression. 1658 """ 1659 if self.error_level != ErrorLevel.IGNORE: 1660 for error_message in expression.error_messages(args): 1661 self.raise_error(error_message) 1662 1663 return expression 1664 1665 def _find_sql(self, start: Token, end: Token) -> str: 1666 return self.sql[start.start : end.end + 1] 1667 1668 def _is_connected(self) -> bool: 1669 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1670 1671 def _advance(self, times: int = 1) -> None: 1672 self._index += times 1673 self._curr = seq_get(self._tokens, self._index) 1674 self._next = seq_get(self._tokens, self._index + 1) 1675 1676 if self._index > 0: 1677 self._prev = self._tokens[self._index - 1] 1678 self._prev_comments = self._prev.comments 1679 else: 1680 self._prev = None 1681 self._prev_comments = None 1682 1683 def _retreat(self, index: int) -> None: 1684 if index != self._index: 1685 self._advance(index - self._index) 1686 1687 def _warn_unsupported(self) -> None: 1688 if len(self._tokens) <= 1: 1689 return 1690 1691 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1692 # interested in emitting a warning for the one being currently processed. 1693 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1694 1695 logger.warning( 1696 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1697 ) 1698 1699 def _parse_command(self) -> exp.Command: 1700 self._warn_unsupported() 1701 return self.expression( 1702 exp.Command, 1703 comments=self._prev_comments, 1704 this=self._prev.text.upper(), 1705 expression=self._parse_string(), 1706 ) 1707 1708 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1709 """ 1710 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1711 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1712 solve this by setting & resetting the parser state accordingly 1713 """ 1714 index = self._index 1715 error_level = self.error_level 1716 1717 self.error_level = ErrorLevel.IMMEDIATE 1718 try: 1719 this = parse_method() 1720 except ParseError: 1721 this = None 1722 finally: 1723 if not this or retreat: 1724 self._retreat(index) 1725 self.error_level = error_level 1726 1727 return this 1728 1729 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1730 start = self._prev 1731 exists = self._parse_exists() if allow_exists else None 1732 1733 self._match(TokenType.ON) 1734 1735 materialized = self._match_text_seq("MATERIALIZED") 1736 kind = self._match_set(self.CREATABLES) and self._prev 1737 if not kind: 1738 return self._parse_as_command(start) 1739 1740 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1741 this = self._parse_user_defined_function(kind=kind.token_type) 1742 elif kind.token_type == TokenType.TABLE: 1743 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1744 elif kind.token_type == TokenType.COLUMN: 1745 this = self._parse_column() 1746 else: 1747 this = self._parse_id_var() 1748 1749 self._match(TokenType.IS) 1750 1751 return self.expression( 1752 exp.Comment, 1753 this=this, 1754 kind=kind.text, 1755 expression=self._parse_string(), 1756 exists=exists, 1757 materialized=materialized, 1758 ) 1759 1760 def _parse_to_table( 1761 self, 1762 ) -> exp.ToTableProperty: 1763 table = self._parse_table_parts(schema=True) 1764 return self.expression(exp.ToTableProperty, this=table) 1765 1766 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1767 def _parse_ttl(self) -> exp.Expression: 1768 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1769 this = self._parse_bitwise() 1770 1771 if self._match_text_seq("DELETE"): 1772 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1773 if self._match_text_seq("RECOMPRESS"): 1774 return self.expression( 1775 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1776 ) 1777 if self._match_text_seq("TO", "DISK"): 1778 return self.expression( 1779 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1780 ) 1781 if self._match_text_seq("TO", "VOLUME"): 1782 return self.expression( 1783 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1784 ) 1785 1786 return this 1787 1788 expressions = self._parse_csv(_parse_ttl_action) 1789 where = self._parse_where() 1790 group = self._parse_group() 1791 1792 aggregates = None 1793 if group and self._match(TokenType.SET): 1794 aggregates = self._parse_csv(self._parse_set_item) 1795 1796 return self.expression( 1797 exp.MergeTreeTTL, 1798 expressions=expressions, 1799 where=where, 1800 group=group, 1801 aggregates=aggregates, 1802 ) 1803 1804 def _parse_statement(self) -> t.Optional[exp.Expression]: 1805 if self._curr is None: 1806 return None 1807 1808 if self._match_set(self.STATEMENT_PARSERS): 1809 comments = self._prev_comments 1810 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1811 stmt.add_comments(comments, prepend=True) 1812 return stmt 1813 1814 if self._match_set(self.dialect.tokenizer.COMMANDS): 1815 return self._parse_command() 1816 1817 expression = self._parse_expression() 1818 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1819 return self._parse_query_modifiers(expression) 1820 1821 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1822 start = self._prev 1823 temporary = self._match(TokenType.TEMPORARY) 1824 materialized = self._match_text_seq("MATERIALIZED") 1825 1826 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1827 if not kind: 1828 return self._parse_as_command(start) 1829 1830 concurrently = self._match_text_seq("CONCURRENTLY") 1831 if_exists = exists or self._parse_exists() 1832 1833 if kind == "COLUMN": 1834 this = self._parse_column() 1835 else: 1836 this = self._parse_table_parts( 1837 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1838 ) 1839 1840 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1841 1842 if self._match(TokenType.L_PAREN, advance=False): 1843 expressions = self._parse_wrapped_csv(self._parse_types) 1844 else: 1845 expressions = None 1846 1847 return self.expression( 1848 exp.Drop, 1849 exists=if_exists, 1850 this=this, 1851 expressions=expressions, 1852 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1853 temporary=temporary, 1854 materialized=materialized, 1855 cascade=self._match_text_seq("CASCADE"), 1856 constraints=self._match_text_seq("CONSTRAINTS"), 1857 purge=self._match_text_seq("PURGE"), 1858 cluster=cluster, 1859 concurrently=concurrently, 1860 ) 1861 1862 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1863 return ( 1864 self._match_text_seq("IF") 1865 and (not not_ or self._match(TokenType.NOT)) 1866 and self._match(TokenType.EXISTS) 1867 ) 1868 1869 def _parse_create(self) -> exp.Create | exp.Command: 1870 # Note: this can't be None because we've matched a statement parser 1871 start = self._prev 1872 1873 replace = ( 1874 start.token_type == TokenType.REPLACE 1875 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1876 or self._match_pair(TokenType.OR, TokenType.ALTER) 1877 ) 1878 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1879 1880 unique = self._match(TokenType.UNIQUE) 1881 1882 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1883 clustered = True 1884 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1885 "COLUMNSTORE" 1886 ): 1887 clustered = False 1888 else: 1889 clustered = None 1890 1891 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1892 self._advance() 1893 1894 properties = None 1895 create_token = self._match_set(self.CREATABLES) and self._prev 1896 1897 if not create_token: 1898 # exp.Properties.Location.POST_CREATE 1899 properties = self._parse_properties() 1900 create_token = self._match_set(self.CREATABLES) and self._prev 1901 1902 if not properties or not create_token: 1903 return self._parse_as_command(start) 1904 1905 concurrently = self._match_text_seq("CONCURRENTLY") 1906 exists = self._parse_exists(not_=True) 1907 this = None 1908 expression: t.Optional[exp.Expression] = None 1909 indexes = None 1910 no_schema_binding = None 1911 begin = None 1912 end = None 1913 clone = None 1914 1915 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1916 nonlocal properties 1917 if properties and temp_props: 1918 properties.expressions.extend(temp_props.expressions) 1919 elif temp_props: 1920 properties = temp_props 1921 1922 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1923 this = self._parse_user_defined_function(kind=create_token.token_type) 1924 1925 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1926 extend_props(self._parse_properties()) 1927 1928 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1929 extend_props(self._parse_properties()) 1930 1931 if not expression: 1932 if self._match(TokenType.COMMAND): 1933 expression = self._parse_as_command(self._prev) 1934 else: 1935 begin = self._match(TokenType.BEGIN) 1936 return_ = self._match_text_seq("RETURN") 1937 1938 if self._match(TokenType.STRING, advance=False): 1939 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1940 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1941 expression = self._parse_string() 1942 extend_props(self._parse_properties()) 1943 else: 1944 expression = self._parse_user_defined_function_expression() 1945 1946 end = self._match_text_seq("END") 1947 1948 if return_: 1949 expression = self.expression(exp.Return, this=expression) 1950 elif create_token.token_type == TokenType.INDEX: 1951 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1952 if not self._match(TokenType.ON): 1953 index = self._parse_id_var() 1954 anonymous = False 1955 else: 1956 index = None 1957 anonymous = True 1958 1959 this = self._parse_index(index=index, anonymous=anonymous) 1960 elif create_token.token_type in self.DB_CREATABLES: 1961 table_parts = self._parse_table_parts( 1962 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1963 ) 1964 1965 # exp.Properties.Location.POST_NAME 1966 self._match(TokenType.COMMA) 1967 extend_props(self._parse_properties(before=True)) 1968 1969 this = self._parse_schema(this=table_parts) 1970 1971 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1972 extend_props(self._parse_properties()) 1973 1974 self._match(TokenType.ALIAS) 1975 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1976 # exp.Properties.Location.POST_ALIAS 1977 extend_props(self._parse_properties()) 1978 1979 if create_token.token_type == TokenType.SEQUENCE: 1980 expression = self._parse_types() 1981 extend_props(self._parse_properties()) 1982 else: 1983 expression = self._parse_ddl_select() 1984 1985 if create_token.token_type == TokenType.TABLE: 1986 # exp.Properties.Location.POST_EXPRESSION 1987 extend_props(self._parse_properties()) 1988 1989 indexes = [] 1990 while True: 1991 index = self._parse_index() 1992 1993 # exp.Properties.Location.POST_INDEX 1994 extend_props(self._parse_properties()) 1995 if not index: 1996 break 1997 else: 1998 self._match(TokenType.COMMA) 1999 indexes.append(index) 2000 elif create_token.token_type == TokenType.VIEW: 2001 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2002 no_schema_binding = True 2003 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2004 extend_props(self._parse_properties()) 2005 2006 shallow = self._match_text_seq("SHALLOW") 2007 2008 if self._match_texts(self.CLONE_KEYWORDS): 2009 copy = self._prev.text.lower() == "copy" 2010 clone = self.expression( 2011 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2012 ) 2013 2014 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2015 return self._parse_as_command(start) 2016 2017 create_kind_text = create_token.text.upper() 2018 return self.expression( 2019 exp.Create, 2020 this=this, 2021 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2022 replace=replace, 2023 refresh=refresh, 2024 unique=unique, 2025 expression=expression, 2026 exists=exists, 2027 properties=properties, 2028 indexes=indexes, 2029 no_schema_binding=no_schema_binding, 2030 begin=begin, 2031 end=end, 2032 clone=clone, 2033 concurrently=concurrently, 2034 clustered=clustered, 2035 ) 2036 2037 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2038 seq = exp.SequenceProperties() 2039 2040 options = [] 2041 index = self._index 2042 2043 while self._curr: 2044 self._match(TokenType.COMMA) 2045 if self._match_text_seq("INCREMENT"): 2046 self._match_text_seq("BY") 2047 self._match_text_seq("=") 2048 seq.set("increment", self._parse_term()) 2049 elif self._match_text_seq("MINVALUE"): 2050 seq.set("minvalue", self._parse_term()) 2051 elif self._match_text_seq("MAXVALUE"): 2052 seq.set("maxvalue", self._parse_term()) 2053 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2054 self._match_text_seq("=") 2055 seq.set("start", self._parse_term()) 2056 elif self._match_text_seq("CACHE"): 2057 # T-SQL allows empty CACHE which is initialized dynamically 2058 seq.set("cache", self._parse_number() or True) 2059 elif self._match_text_seq("OWNED", "BY"): 2060 # "OWNED BY NONE" is the default 2061 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2062 else: 2063 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2064 if opt: 2065 options.append(opt) 2066 else: 2067 break 2068 2069 seq.set("options", options if options else None) 2070 return None if self._index == index else seq 2071 2072 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2073 # only used for teradata currently 2074 self._match(TokenType.COMMA) 2075 2076 kwargs = { 2077 "no": self._match_text_seq("NO"), 2078 "dual": self._match_text_seq("DUAL"), 2079 "before": self._match_text_seq("BEFORE"), 2080 "default": self._match_text_seq("DEFAULT"), 2081 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2082 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2083 "after": self._match_text_seq("AFTER"), 2084 "minimum": self._match_texts(("MIN", "MINIMUM")), 2085 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2086 } 2087 2088 if self._match_texts(self.PROPERTY_PARSERS): 2089 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2090 try: 2091 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2092 except TypeError: 2093 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2094 2095 return None 2096 2097 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2098 return self._parse_wrapped_csv(self._parse_property) 2099 2100 def _parse_property(self) -> t.Optional[exp.Expression]: 2101 if self._match_texts(self.PROPERTY_PARSERS): 2102 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2103 2104 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2105 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2106 2107 if self._match_text_seq("COMPOUND", "SORTKEY"): 2108 return self._parse_sortkey(compound=True) 2109 2110 if self._match_text_seq("SQL", "SECURITY"): 2111 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2112 2113 index = self._index 2114 key = self._parse_column() 2115 2116 if not self._match(TokenType.EQ): 2117 self._retreat(index) 2118 return self._parse_sequence_properties() 2119 2120 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2121 if isinstance(key, exp.Column): 2122 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2123 2124 value = self._parse_bitwise() or self._parse_var(any_token=True) 2125 2126 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2127 if isinstance(value, exp.Column): 2128 value = exp.var(value.name) 2129 2130 return self.expression(exp.Property, this=key, value=value) 2131 2132 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2133 if self._match_text_seq("BY"): 2134 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2135 2136 self._match(TokenType.ALIAS) 2137 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2138 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2139 2140 return self.expression( 2141 exp.FileFormatProperty, 2142 this=( 2143 self.expression( 2144 exp.InputOutputFormat, 2145 input_format=input_format, 2146 output_format=output_format, 2147 ) 2148 if input_format or output_format 2149 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2150 ), 2151 ) 2152 2153 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2154 field = self._parse_field() 2155 if isinstance(field, exp.Identifier) and not field.quoted: 2156 field = exp.var(field) 2157 2158 return field 2159 2160 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2161 self._match(TokenType.EQ) 2162 self._match(TokenType.ALIAS) 2163 2164 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2165 2166 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2167 properties = [] 2168 while True: 2169 if before: 2170 prop = self._parse_property_before() 2171 else: 2172 prop = self._parse_property() 2173 if not prop: 2174 break 2175 for p in ensure_list(prop): 2176 properties.append(p) 2177 2178 if properties: 2179 return self.expression(exp.Properties, expressions=properties) 2180 2181 return None 2182 2183 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2184 return self.expression( 2185 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2186 ) 2187 2188 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2189 if self._match_texts(("DEFINER", "INVOKER")): 2190 security_specifier = self._prev.text.upper() 2191 return self.expression(exp.SecurityProperty, this=security_specifier) 2192 return None 2193 2194 def _parse_settings_property(self) -> exp.SettingsProperty: 2195 return self.expression( 2196 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2197 ) 2198 2199 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2200 if self._index >= 2: 2201 pre_volatile_token = self._tokens[self._index - 2] 2202 else: 2203 pre_volatile_token = None 2204 2205 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2206 return exp.VolatileProperty() 2207 2208 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2209 2210 def _parse_retention_period(self) -> exp.Var: 2211 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2212 number = self._parse_number() 2213 number_str = f"{number} " if number else "" 2214 unit = self._parse_var(any_token=True) 2215 return exp.var(f"{number_str}{unit}") 2216 2217 def _parse_system_versioning_property( 2218 self, with_: bool = False 2219 ) -> exp.WithSystemVersioningProperty: 2220 self._match(TokenType.EQ) 2221 prop = self.expression( 2222 exp.WithSystemVersioningProperty, 2223 **{ # type: ignore 2224 "on": True, 2225 "with": with_, 2226 }, 2227 ) 2228 2229 if self._match_text_seq("OFF"): 2230 prop.set("on", False) 2231 return prop 2232 2233 self._match(TokenType.ON) 2234 if self._match(TokenType.L_PAREN): 2235 while self._curr and not self._match(TokenType.R_PAREN): 2236 if self._match_text_seq("HISTORY_TABLE", "="): 2237 prop.set("this", self._parse_table_parts()) 2238 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2239 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2240 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2241 prop.set("retention_period", self._parse_retention_period()) 2242 2243 self._match(TokenType.COMMA) 2244 2245 return prop 2246 2247 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2248 self._match(TokenType.EQ) 2249 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2250 prop = self.expression(exp.DataDeletionProperty, on=on) 2251 2252 if self._match(TokenType.L_PAREN): 2253 while self._curr and not self._match(TokenType.R_PAREN): 2254 if self._match_text_seq("FILTER_COLUMN", "="): 2255 prop.set("filter_column", self._parse_column()) 2256 elif self._match_text_seq("RETENTION_PERIOD", "="): 2257 prop.set("retention_period", self._parse_retention_period()) 2258 2259 self._match(TokenType.COMMA) 2260 2261 return prop 2262 2263 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2264 kind = "HASH" 2265 expressions: t.Optional[t.List[exp.Expression]] = None 2266 if self._match_text_seq("BY", "HASH"): 2267 expressions = self._parse_wrapped_csv(self._parse_id_var) 2268 elif self._match_text_seq("BY", "RANDOM"): 2269 kind = "RANDOM" 2270 2271 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2272 buckets: t.Optional[exp.Expression] = None 2273 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2274 buckets = self._parse_number() 2275 2276 return self.expression( 2277 exp.DistributedByProperty, 2278 expressions=expressions, 2279 kind=kind, 2280 buckets=buckets, 2281 order=self._parse_order(), 2282 ) 2283 2284 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2285 self._match_text_seq("KEY") 2286 expressions = self._parse_wrapped_id_vars() 2287 return self.expression(expr_type, expressions=expressions) 2288 2289 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2290 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2291 prop = self._parse_system_versioning_property(with_=True) 2292 self._match_r_paren() 2293 return prop 2294 2295 if self._match(TokenType.L_PAREN, advance=False): 2296 return self._parse_wrapped_properties() 2297 2298 if self._match_text_seq("JOURNAL"): 2299 return self._parse_withjournaltable() 2300 2301 if self._match_texts(self.VIEW_ATTRIBUTES): 2302 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2303 2304 if self._match_text_seq("DATA"): 2305 return self._parse_withdata(no=False) 2306 elif self._match_text_seq("NO", "DATA"): 2307 return self._parse_withdata(no=True) 2308 2309 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2310 return self._parse_serde_properties(with_=True) 2311 2312 if self._match(TokenType.SCHEMA): 2313 return self.expression( 2314 exp.WithSchemaBindingProperty, 2315 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2316 ) 2317 2318 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2319 return self.expression( 2320 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2321 ) 2322 2323 if not self._next: 2324 return None 2325 2326 return self._parse_withisolatedloading() 2327 2328 def _parse_procedure_option(self) -> exp.Expression | None: 2329 if self._match_text_seq("EXECUTE", "AS"): 2330 return self.expression( 2331 exp.ExecuteAsProperty, 2332 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2333 or self._parse_string(), 2334 ) 2335 2336 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2337 2338 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2339 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2340 self._match(TokenType.EQ) 2341 2342 user = self._parse_id_var() 2343 self._match(TokenType.PARAMETER) 2344 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2345 2346 if not user or not host: 2347 return None 2348 2349 return exp.DefinerProperty(this=f"{user}@{host}") 2350 2351 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2352 self._match(TokenType.TABLE) 2353 self._match(TokenType.EQ) 2354 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2355 2356 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2357 return self.expression(exp.LogProperty, no=no) 2358 2359 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2360 return self.expression(exp.JournalProperty, **kwargs) 2361 2362 def _parse_checksum(self) -> exp.ChecksumProperty: 2363 self._match(TokenType.EQ) 2364 2365 on = None 2366 if self._match(TokenType.ON): 2367 on = True 2368 elif self._match_text_seq("OFF"): 2369 on = False 2370 2371 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2372 2373 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2374 return self.expression( 2375 exp.Cluster, 2376 expressions=( 2377 self._parse_wrapped_csv(self._parse_ordered) 2378 if wrapped 2379 else self._parse_csv(self._parse_ordered) 2380 ), 2381 ) 2382 2383 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2384 self._match_text_seq("BY") 2385 2386 self._match_l_paren() 2387 expressions = self._parse_csv(self._parse_column) 2388 self._match_r_paren() 2389 2390 if self._match_text_seq("SORTED", "BY"): 2391 self._match_l_paren() 2392 sorted_by = self._parse_csv(self._parse_ordered) 2393 self._match_r_paren() 2394 else: 2395 sorted_by = None 2396 2397 self._match(TokenType.INTO) 2398 buckets = self._parse_number() 2399 self._match_text_seq("BUCKETS") 2400 2401 return self.expression( 2402 exp.ClusteredByProperty, 2403 expressions=expressions, 2404 sorted_by=sorted_by, 2405 buckets=buckets, 2406 ) 2407 2408 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2409 if not self._match_text_seq("GRANTS"): 2410 self._retreat(self._index - 1) 2411 return None 2412 2413 return self.expression(exp.CopyGrantsProperty) 2414 2415 def _parse_freespace(self) -> exp.FreespaceProperty: 2416 self._match(TokenType.EQ) 2417 return self.expression( 2418 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2419 ) 2420 2421 def _parse_mergeblockratio( 2422 self, no: bool = False, default: bool = False 2423 ) -> exp.MergeBlockRatioProperty: 2424 if self._match(TokenType.EQ): 2425 return self.expression( 2426 exp.MergeBlockRatioProperty, 2427 this=self._parse_number(), 2428 percent=self._match(TokenType.PERCENT), 2429 ) 2430 2431 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2432 2433 def _parse_datablocksize( 2434 self, 2435 default: t.Optional[bool] = None, 2436 minimum: t.Optional[bool] = None, 2437 maximum: t.Optional[bool] = None, 2438 ) -> exp.DataBlocksizeProperty: 2439 self._match(TokenType.EQ) 2440 size = self._parse_number() 2441 2442 units = None 2443 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2444 units = self._prev.text 2445 2446 return self.expression( 2447 exp.DataBlocksizeProperty, 2448 size=size, 2449 units=units, 2450 default=default, 2451 minimum=minimum, 2452 maximum=maximum, 2453 ) 2454 2455 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2456 self._match(TokenType.EQ) 2457 always = self._match_text_seq("ALWAYS") 2458 manual = self._match_text_seq("MANUAL") 2459 never = self._match_text_seq("NEVER") 2460 default = self._match_text_seq("DEFAULT") 2461 2462 autotemp = None 2463 if self._match_text_seq("AUTOTEMP"): 2464 autotemp = self._parse_schema() 2465 2466 return self.expression( 2467 exp.BlockCompressionProperty, 2468 always=always, 2469 manual=manual, 2470 never=never, 2471 default=default, 2472 autotemp=autotemp, 2473 ) 2474 2475 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2476 index = self._index 2477 no = self._match_text_seq("NO") 2478 concurrent = self._match_text_seq("CONCURRENT") 2479 2480 if not self._match_text_seq("ISOLATED", "LOADING"): 2481 self._retreat(index) 2482 return None 2483 2484 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2485 return self.expression( 2486 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2487 ) 2488 2489 def _parse_locking(self) -> exp.LockingProperty: 2490 if self._match(TokenType.TABLE): 2491 kind = "TABLE" 2492 elif self._match(TokenType.VIEW): 2493 kind = "VIEW" 2494 elif self._match(TokenType.ROW): 2495 kind = "ROW" 2496 elif self._match_text_seq("DATABASE"): 2497 kind = "DATABASE" 2498 else: 2499 kind = None 2500 2501 if kind in ("DATABASE", "TABLE", "VIEW"): 2502 this = self._parse_table_parts() 2503 else: 2504 this = None 2505 2506 if self._match(TokenType.FOR): 2507 for_or_in = "FOR" 2508 elif self._match(TokenType.IN): 2509 for_or_in = "IN" 2510 else: 2511 for_or_in = None 2512 2513 if self._match_text_seq("ACCESS"): 2514 lock_type = "ACCESS" 2515 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2516 lock_type = "EXCLUSIVE" 2517 elif self._match_text_seq("SHARE"): 2518 lock_type = "SHARE" 2519 elif self._match_text_seq("READ"): 2520 lock_type = "READ" 2521 elif self._match_text_seq("WRITE"): 2522 lock_type = "WRITE" 2523 elif self._match_text_seq("CHECKSUM"): 2524 lock_type = "CHECKSUM" 2525 else: 2526 lock_type = None 2527 2528 override = self._match_text_seq("OVERRIDE") 2529 2530 return self.expression( 2531 exp.LockingProperty, 2532 this=this, 2533 kind=kind, 2534 for_or_in=for_or_in, 2535 lock_type=lock_type, 2536 override=override, 2537 ) 2538 2539 def _parse_partition_by(self) -> t.List[exp.Expression]: 2540 if self._match(TokenType.PARTITION_BY): 2541 return self._parse_csv(self._parse_assignment) 2542 return [] 2543 2544 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2545 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2546 if self._match_text_seq("MINVALUE"): 2547 return exp.var("MINVALUE") 2548 if self._match_text_seq("MAXVALUE"): 2549 return exp.var("MAXVALUE") 2550 return self._parse_bitwise() 2551 2552 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2553 expression = None 2554 from_expressions = None 2555 to_expressions = None 2556 2557 if self._match(TokenType.IN): 2558 this = self._parse_wrapped_csv(self._parse_bitwise) 2559 elif self._match(TokenType.FROM): 2560 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2561 self._match_text_seq("TO") 2562 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2563 elif self._match_text_seq("WITH", "(", "MODULUS"): 2564 this = self._parse_number() 2565 self._match_text_seq(",", "REMAINDER") 2566 expression = self._parse_number() 2567 self._match_r_paren() 2568 else: 2569 self.raise_error("Failed to parse partition bound spec.") 2570 2571 return self.expression( 2572 exp.PartitionBoundSpec, 2573 this=this, 2574 expression=expression, 2575 from_expressions=from_expressions, 2576 to_expressions=to_expressions, 2577 ) 2578 2579 # https://www.postgresql.org/docs/current/sql-createtable.html 2580 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2581 if not self._match_text_seq("OF"): 2582 self._retreat(self._index - 1) 2583 return None 2584 2585 this = self._parse_table(schema=True) 2586 2587 if self._match(TokenType.DEFAULT): 2588 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2589 elif self._match_text_seq("FOR", "VALUES"): 2590 expression = self._parse_partition_bound_spec() 2591 else: 2592 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2593 2594 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2595 2596 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2597 self._match(TokenType.EQ) 2598 return self.expression( 2599 exp.PartitionedByProperty, 2600 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2601 ) 2602 2603 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2604 if self._match_text_seq("AND", "STATISTICS"): 2605 statistics = True 2606 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2607 statistics = False 2608 else: 2609 statistics = None 2610 2611 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2612 2613 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2614 if self._match_text_seq("SQL"): 2615 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2616 return None 2617 2618 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2619 if self._match_text_seq("SQL", "DATA"): 2620 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2621 return None 2622 2623 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2624 if self._match_text_seq("PRIMARY", "INDEX"): 2625 return exp.NoPrimaryIndexProperty() 2626 if self._match_text_seq("SQL"): 2627 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2628 return None 2629 2630 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2631 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2632 return exp.OnCommitProperty() 2633 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2634 return exp.OnCommitProperty(delete=True) 2635 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2636 2637 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2638 if self._match_text_seq("SQL", "DATA"): 2639 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2640 return None 2641 2642 def _parse_distkey(self) -> exp.DistKeyProperty: 2643 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2644 2645 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2646 table = self._parse_table(schema=True) 2647 2648 options = [] 2649 while self._match_texts(("INCLUDING", "EXCLUDING")): 2650 this = self._prev.text.upper() 2651 2652 id_var = self._parse_id_var() 2653 if not id_var: 2654 return None 2655 2656 options.append( 2657 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2658 ) 2659 2660 return self.expression(exp.LikeProperty, this=table, expressions=options) 2661 2662 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2663 return self.expression( 2664 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2665 ) 2666 2667 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2668 self._match(TokenType.EQ) 2669 return self.expression( 2670 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2671 ) 2672 2673 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2674 self._match_text_seq("WITH", "CONNECTION") 2675 return self.expression( 2676 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2677 ) 2678 2679 def _parse_returns(self) -> exp.ReturnsProperty: 2680 value: t.Optional[exp.Expression] 2681 null = None 2682 is_table = self._match(TokenType.TABLE) 2683 2684 if is_table: 2685 if self._match(TokenType.LT): 2686 value = self.expression( 2687 exp.Schema, 2688 this="TABLE", 2689 expressions=self._parse_csv(self._parse_struct_types), 2690 ) 2691 if not self._match(TokenType.GT): 2692 self.raise_error("Expecting >") 2693 else: 2694 value = self._parse_schema(exp.var("TABLE")) 2695 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2696 null = True 2697 value = None 2698 else: 2699 value = self._parse_types() 2700 2701 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2702 2703 def _parse_describe(self) -> exp.Describe: 2704 kind = self._match_set(self.CREATABLES) and self._prev.text 2705 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2706 if self._match(TokenType.DOT): 2707 style = None 2708 self._retreat(self._index - 2) 2709 2710 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2711 2712 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2713 this = self._parse_statement() 2714 else: 2715 this = self._parse_table(schema=True) 2716 2717 properties = self._parse_properties() 2718 expressions = properties.expressions if properties else None 2719 partition = self._parse_partition() 2720 return self.expression( 2721 exp.Describe, 2722 this=this, 2723 style=style, 2724 kind=kind, 2725 expressions=expressions, 2726 partition=partition, 2727 format=format, 2728 ) 2729 2730 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2731 kind = self._prev.text.upper() 2732 expressions = [] 2733 2734 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2735 if self._match(TokenType.WHEN): 2736 expression = self._parse_disjunction() 2737 self._match(TokenType.THEN) 2738 else: 2739 expression = None 2740 2741 else_ = self._match(TokenType.ELSE) 2742 2743 if not self._match(TokenType.INTO): 2744 return None 2745 2746 return self.expression( 2747 exp.ConditionalInsert, 2748 this=self.expression( 2749 exp.Insert, 2750 this=self._parse_table(schema=True), 2751 expression=self._parse_derived_table_values(), 2752 ), 2753 expression=expression, 2754 else_=else_, 2755 ) 2756 2757 expression = parse_conditional_insert() 2758 while expression is not None: 2759 expressions.append(expression) 2760 expression = parse_conditional_insert() 2761 2762 return self.expression( 2763 exp.MultitableInserts, 2764 kind=kind, 2765 comments=comments, 2766 expressions=expressions, 2767 source=self._parse_table(), 2768 ) 2769 2770 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2771 comments = [] 2772 hint = self._parse_hint() 2773 overwrite = self._match(TokenType.OVERWRITE) 2774 ignore = self._match(TokenType.IGNORE) 2775 local = self._match_text_seq("LOCAL") 2776 alternative = None 2777 is_function = None 2778 2779 if self._match_text_seq("DIRECTORY"): 2780 this: t.Optional[exp.Expression] = self.expression( 2781 exp.Directory, 2782 this=self._parse_var_or_string(), 2783 local=local, 2784 row_format=self._parse_row_format(match_row=True), 2785 ) 2786 else: 2787 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2788 comments += ensure_list(self._prev_comments) 2789 return self._parse_multitable_inserts(comments) 2790 2791 if self._match(TokenType.OR): 2792 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2793 2794 self._match(TokenType.INTO) 2795 comments += ensure_list(self._prev_comments) 2796 self._match(TokenType.TABLE) 2797 is_function = self._match(TokenType.FUNCTION) 2798 2799 this = ( 2800 self._parse_table(schema=True, parse_partition=True) 2801 if not is_function 2802 else self._parse_function() 2803 ) 2804 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2805 this.set("alias", self._parse_table_alias()) 2806 2807 returning = self._parse_returning() 2808 2809 return self.expression( 2810 exp.Insert, 2811 comments=comments, 2812 hint=hint, 2813 is_function=is_function, 2814 this=this, 2815 stored=self._match_text_seq("STORED") and self._parse_stored(), 2816 by_name=self._match_text_seq("BY", "NAME"), 2817 exists=self._parse_exists(), 2818 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2819 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2820 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2821 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2822 conflict=self._parse_on_conflict(), 2823 returning=returning or self._parse_returning(), 2824 overwrite=overwrite, 2825 alternative=alternative, 2826 ignore=ignore, 2827 source=self._match(TokenType.TABLE) and self._parse_table(), 2828 ) 2829 2830 def _parse_kill(self) -> exp.Kill: 2831 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2832 2833 return self.expression( 2834 exp.Kill, 2835 this=self._parse_primary(), 2836 kind=kind, 2837 ) 2838 2839 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2840 conflict = self._match_text_seq("ON", "CONFLICT") 2841 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2842 2843 if not conflict and not duplicate: 2844 return None 2845 2846 conflict_keys = None 2847 constraint = None 2848 2849 if conflict: 2850 if self._match_text_seq("ON", "CONSTRAINT"): 2851 constraint = self._parse_id_var() 2852 elif self._match(TokenType.L_PAREN): 2853 conflict_keys = self._parse_csv(self._parse_id_var) 2854 self._match_r_paren() 2855 2856 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2857 if self._prev.token_type == TokenType.UPDATE: 2858 self._match(TokenType.SET) 2859 expressions = self._parse_csv(self._parse_equality) 2860 else: 2861 expressions = None 2862 2863 return self.expression( 2864 exp.OnConflict, 2865 duplicate=duplicate, 2866 expressions=expressions, 2867 action=action, 2868 conflict_keys=conflict_keys, 2869 constraint=constraint, 2870 where=self._parse_where(), 2871 ) 2872 2873 def _parse_returning(self) -> t.Optional[exp.Returning]: 2874 if not self._match(TokenType.RETURNING): 2875 return None 2876 return self.expression( 2877 exp.Returning, 2878 expressions=self._parse_csv(self._parse_expression), 2879 into=self._match(TokenType.INTO) and self._parse_table_part(), 2880 ) 2881 2882 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2883 if not self._match(TokenType.FORMAT): 2884 return None 2885 return self._parse_row_format() 2886 2887 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2888 index = self._index 2889 with_ = with_ or self._match_text_seq("WITH") 2890 2891 if not self._match(TokenType.SERDE_PROPERTIES): 2892 self._retreat(index) 2893 return None 2894 return self.expression( 2895 exp.SerdeProperties, 2896 **{ # type: ignore 2897 "expressions": self._parse_wrapped_properties(), 2898 "with": with_, 2899 }, 2900 ) 2901 2902 def _parse_row_format( 2903 self, match_row: bool = False 2904 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2905 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2906 return None 2907 2908 if self._match_text_seq("SERDE"): 2909 this = self._parse_string() 2910 2911 serde_properties = self._parse_serde_properties() 2912 2913 return self.expression( 2914 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2915 ) 2916 2917 self._match_text_seq("DELIMITED") 2918 2919 kwargs = {} 2920 2921 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2922 kwargs["fields"] = self._parse_string() 2923 if self._match_text_seq("ESCAPED", "BY"): 2924 kwargs["escaped"] = self._parse_string() 2925 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2926 kwargs["collection_items"] = self._parse_string() 2927 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2928 kwargs["map_keys"] = self._parse_string() 2929 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2930 kwargs["lines"] = self._parse_string() 2931 if self._match_text_seq("NULL", "DEFINED", "AS"): 2932 kwargs["null"] = self._parse_string() 2933 2934 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2935 2936 def _parse_load(self) -> exp.LoadData | exp.Command: 2937 if self._match_text_seq("DATA"): 2938 local = self._match_text_seq("LOCAL") 2939 self._match_text_seq("INPATH") 2940 inpath = self._parse_string() 2941 overwrite = self._match(TokenType.OVERWRITE) 2942 self._match_pair(TokenType.INTO, TokenType.TABLE) 2943 2944 return self.expression( 2945 exp.LoadData, 2946 this=self._parse_table(schema=True), 2947 local=local, 2948 overwrite=overwrite, 2949 inpath=inpath, 2950 partition=self._parse_partition(), 2951 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2952 serde=self._match_text_seq("SERDE") and self._parse_string(), 2953 ) 2954 return self._parse_as_command(self._prev) 2955 2956 def _parse_delete(self) -> exp.Delete: 2957 # This handles MySQL's "Multiple-Table Syntax" 2958 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2959 tables = None 2960 if not self._match(TokenType.FROM, advance=False): 2961 tables = self._parse_csv(self._parse_table) or None 2962 2963 returning = self._parse_returning() 2964 2965 return self.expression( 2966 exp.Delete, 2967 tables=tables, 2968 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2969 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2970 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2971 where=self._parse_where(), 2972 returning=returning or self._parse_returning(), 2973 limit=self._parse_limit(), 2974 ) 2975 2976 def _parse_update(self) -> exp.Update: 2977 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2978 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2979 returning = self._parse_returning() 2980 return self.expression( 2981 exp.Update, 2982 **{ # type: ignore 2983 "this": this, 2984 "expressions": expressions, 2985 "from": self._parse_from(joins=True), 2986 "where": self._parse_where(), 2987 "returning": returning or self._parse_returning(), 2988 "order": self._parse_order(), 2989 "limit": self._parse_limit(), 2990 }, 2991 ) 2992 2993 def _parse_use(self) -> exp.Use: 2994 return self.expression( 2995 exp.Use, 2996 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2997 this=self._parse_table(schema=False), 2998 ) 2999 3000 def _parse_uncache(self) -> exp.Uncache: 3001 if not self._match(TokenType.TABLE): 3002 self.raise_error("Expecting TABLE after UNCACHE") 3003 3004 return self.expression( 3005 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3006 ) 3007 3008 def _parse_cache(self) -> exp.Cache: 3009 lazy = self._match_text_seq("LAZY") 3010 self._match(TokenType.TABLE) 3011 table = self._parse_table(schema=True) 3012 3013 options = [] 3014 if self._match_text_seq("OPTIONS"): 3015 self._match_l_paren() 3016 k = self._parse_string() 3017 self._match(TokenType.EQ) 3018 v = self._parse_string() 3019 options = [k, v] 3020 self._match_r_paren() 3021 3022 self._match(TokenType.ALIAS) 3023 return self.expression( 3024 exp.Cache, 3025 this=table, 3026 lazy=lazy, 3027 options=options, 3028 expression=self._parse_select(nested=True), 3029 ) 3030 3031 def _parse_partition(self) -> t.Optional[exp.Partition]: 3032 if not self._match_texts(self.PARTITION_KEYWORDS): 3033 return None 3034 3035 return self.expression( 3036 exp.Partition, 3037 subpartition=self._prev.text.upper() == "SUBPARTITION", 3038 expressions=self._parse_wrapped_csv(self._parse_assignment), 3039 ) 3040 3041 def _parse_value(self) -> t.Optional[exp.Tuple]: 3042 def _parse_value_expression() -> t.Optional[exp.Expression]: 3043 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3044 return exp.var(self._prev.text.upper()) 3045 return self._parse_expression() 3046 3047 if self._match(TokenType.L_PAREN): 3048 expressions = self._parse_csv(_parse_value_expression) 3049 self._match_r_paren() 3050 return self.expression(exp.Tuple, expressions=expressions) 3051 3052 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3053 expression = self._parse_expression() 3054 if expression: 3055 return self.expression(exp.Tuple, expressions=[expression]) 3056 return None 3057 3058 def _parse_projections(self) -> t.List[exp.Expression]: 3059 return self._parse_expressions() 3060 3061 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3062 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3063 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3064 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3065 ) 3066 elif self._match(TokenType.FROM): 3067 from_ = self._parse_from(skip_from_token=True) 3068 # Support parentheses for duckdb FROM-first syntax 3069 select = self._parse_select() 3070 if select: 3071 select.set("from", from_) 3072 this = select 3073 else: 3074 this = exp.select("*").from_(t.cast(exp.From, from_)) 3075 else: 3076 this = ( 3077 self._parse_table() 3078 if table 3079 else self._parse_select(nested=True, parse_set_operation=False) 3080 ) 3081 3082 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3083 # in case a modifier (e.g. join) is following 3084 if table and isinstance(this, exp.Values) and this.alias: 3085 alias = this.args["alias"].pop() 3086 this = exp.Table(this=this, alias=alias) 3087 3088 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3089 3090 return this 3091 3092 def _parse_select( 3093 self, 3094 nested: bool = False, 3095 table: bool = False, 3096 parse_subquery_alias: bool = True, 3097 parse_set_operation: bool = True, 3098 ) -> t.Optional[exp.Expression]: 3099 cte = self._parse_with() 3100 3101 if cte: 3102 this = self._parse_statement() 3103 3104 if not this: 3105 self.raise_error("Failed to parse any statement following CTE") 3106 return cte 3107 3108 if "with" in this.arg_types: 3109 this.set("with", cte) 3110 else: 3111 self.raise_error(f"{this.key} does not support CTE") 3112 this = cte 3113 3114 return this 3115 3116 # duckdb supports leading with FROM x 3117 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3118 3119 if self._match(TokenType.SELECT): 3120 comments = self._prev_comments 3121 3122 hint = self._parse_hint() 3123 3124 if self._next and not self._next.token_type == TokenType.DOT: 3125 all_ = self._match(TokenType.ALL) 3126 distinct = self._match_set(self.DISTINCT_TOKENS) 3127 else: 3128 all_, distinct = None, None 3129 3130 kind = ( 3131 self._match(TokenType.ALIAS) 3132 and self._match_texts(("STRUCT", "VALUE")) 3133 and self._prev.text.upper() 3134 ) 3135 3136 if distinct: 3137 distinct = self.expression( 3138 exp.Distinct, 3139 on=self._parse_value() if self._match(TokenType.ON) else None, 3140 ) 3141 3142 if all_ and distinct: 3143 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3144 3145 operation_modifiers = [] 3146 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3147 operation_modifiers.append(exp.var(self._prev.text.upper())) 3148 3149 limit = self._parse_limit(top=True) 3150 projections = self._parse_projections() 3151 3152 this = self.expression( 3153 exp.Select, 3154 kind=kind, 3155 hint=hint, 3156 distinct=distinct, 3157 expressions=projections, 3158 limit=limit, 3159 operation_modifiers=operation_modifiers or None, 3160 ) 3161 this.comments = comments 3162 3163 into = self._parse_into() 3164 if into: 3165 this.set("into", into) 3166 3167 if not from_: 3168 from_ = self._parse_from() 3169 3170 if from_: 3171 this.set("from", from_) 3172 3173 this = self._parse_query_modifiers(this) 3174 elif (table or nested) and self._match(TokenType.L_PAREN): 3175 this = self._parse_wrapped_select(table=table) 3176 3177 # We return early here so that the UNION isn't attached to the subquery by the 3178 # following call to _parse_set_operations, but instead becomes the parent node 3179 self._match_r_paren() 3180 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3181 elif self._match(TokenType.VALUES, advance=False): 3182 this = self._parse_derived_table_values() 3183 elif from_: 3184 this = exp.select("*").from_(from_.this, copy=False) 3185 elif self._match(TokenType.SUMMARIZE): 3186 table = self._match(TokenType.TABLE) 3187 this = self._parse_select() or self._parse_string() or self._parse_table() 3188 return self.expression(exp.Summarize, this=this, table=table) 3189 elif self._match(TokenType.DESCRIBE): 3190 this = self._parse_describe() 3191 elif self._match_text_seq("STREAM"): 3192 this = self._parse_function() 3193 if this: 3194 this = self.expression(exp.Stream, this=this) 3195 else: 3196 self._retreat(self._index - 1) 3197 else: 3198 this = None 3199 3200 return self._parse_set_operations(this) if parse_set_operation else this 3201 3202 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3203 self._match_text_seq("SEARCH") 3204 3205 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3206 3207 if not kind: 3208 return None 3209 3210 self._match_text_seq("FIRST", "BY") 3211 3212 return self.expression( 3213 exp.RecursiveWithSearch, 3214 kind=kind, 3215 this=self._parse_id_var(), 3216 expression=self._match_text_seq("SET") and self._parse_id_var(), 3217 using=self._match_text_seq("USING") and self._parse_id_var(), 3218 ) 3219 3220 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3221 if not skip_with_token and not self._match(TokenType.WITH): 3222 return None 3223 3224 comments = self._prev_comments 3225 recursive = self._match(TokenType.RECURSIVE) 3226 3227 last_comments = None 3228 expressions = [] 3229 while True: 3230 cte = self._parse_cte() 3231 if isinstance(cte, exp.CTE): 3232 expressions.append(cte) 3233 if last_comments: 3234 cte.add_comments(last_comments) 3235 3236 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3237 break 3238 else: 3239 self._match(TokenType.WITH) 3240 3241 last_comments = self._prev_comments 3242 3243 return self.expression( 3244 exp.With, 3245 comments=comments, 3246 expressions=expressions, 3247 recursive=recursive, 3248 search=self._parse_recursive_with_search(), 3249 ) 3250 3251 def _parse_cte(self) -> t.Optional[exp.CTE]: 3252 index = self._index 3253 3254 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3255 if not alias or not alias.this: 3256 self.raise_error("Expected CTE to have alias") 3257 3258 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3259 self._retreat(index) 3260 return None 3261 3262 comments = self._prev_comments 3263 3264 if self._match_text_seq("NOT", "MATERIALIZED"): 3265 materialized = False 3266 elif self._match_text_seq("MATERIALIZED"): 3267 materialized = True 3268 else: 3269 materialized = None 3270 3271 cte = self.expression( 3272 exp.CTE, 3273 this=self._parse_wrapped(self._parse_statement), 3274 alias=alias, 3275 materialized=materialized, 3276 comments=comments, 3277 ) 3278 3279 if isinstance(cte.this, exp.Values): 3280 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3281 3282 return cte 3283 3284 def _parse_table_alias( 3285 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3286 ) -> t.Optional[exp.TableAlias]: 3287 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3288 # so this section tries to parse the clause version and if it fails, it treats the token 3289 # as an identifier (alias) 3290 if self._can_parse_limit_or_offset(): 3291 return None 3292 3293 any_token = self._match(TokenType.ALIAS) 3294 alias = ( 3295 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3296 or self._parse_string_as_identifier() 3297 ) 3298 3299 index = self._index 3300 if self._match(TokenType.L_PAREN): 3301 columns = self._parse_csv(self._parse_function_parameter) 3302 self._match_r_paren() if columns else self._retreat(index) 3303 else: 3304 columns = None 3305 3306 if not alias and not columns: 3307 return None 3308 3309 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3310 3311 # We bubble up comments from the Identifier to the TableAlias 3312 if isinstance(alias, exp.Identifier): 3313 table_alias.add_comments(alias.pop_comments()) 3314 3315 return table_alias 3316 3317 def _parse_subquery( 3318 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3319 ) -> t.Optional[exp.Subquery]: 3320 if not this: 3321 return None 3322 3323 return self.expression( 3324 exp.Subquery, 3325 this=this, 3326 pivots=self._parse_pivots(), 3327 alias=self._parse_table_alias() if parse_alias else None, 3328 sample=self._parse_table_sample(), 3329 ) 3330 3331 def _implicit_unnests_to_explicit(self, this: E) -> E: 3332 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3333 3334 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3335 for i, join in enumerate(this.args.get("joins") or []): 3336 table = join.this 3337 normalized_table = table.copy() 3338 normalized_table.meta["maybe_column"] = True 3339 normalized_table = _norm(normalized_table, dialect=self.dialect) 3340 3341 if isinstance(table, exp.Table) and not join.args.get("on"): 3342 if normalized_table.parts[0].name in refs: 3343 table_as_column = table.to_column() 3344 unnest = exp.Unnest(expressions=[table_as_column]) 3345 3346 # Table.to_column creates a parent Alias node that we want to convert to 3347 # a TableAlias and attach to the Unnest, so it matches the parser's output 3348 if isinstance(table.args.get("alias"), exp.TableAlias): 3349 table_as_column.replace(table_as_column.this) 3350 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3351 3352 table.replace(unnest) 3353 3354 refs.add(normalized_table.alias_or_name) 3355 3356 return this 3357 3358 def _parse_query_modifiers( 3359 self, this: t.Optional[exp.Expression] 3360 ) -> t.Optional[exp.Expression]: 3361 if isinstance(this, self.MODIFIABLES): 3362 for join in self._parse_joins(): 3363 this.append("joins", join) 3364 for lateral in iter(self._parse_lateral, None): 3365 this.append("laterals", lateral) 3366 3367 while True: 3368 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3369 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3370 key, expression = parser(self) 3371 3372 if expression: 3373 this.set(key, expression) 3374 if key == "limit": 3375 offset = expression.args.pop("offset", None) 3376 3377 if offset: 3378 offset = exp.Offset(expression=offset) 3379 this.set("offset", offset) 3380 3381 limit_by_expressions = expression.expressions 3382 expression.set("expressions", None) 3383 offset.set("expressions", limit_by_expressions) 3384 continue 3385 break 3386 3387 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3388 this = self._implicit_unnests_to_explicit(this) 3389 3390 return this 3391 3392 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3393 start = self._curr 3394 while self._curr: 3395 self._advance() 3396 3397 end = self._tokens[self._index - 1] 3398 return exp.Hint(expressions=[self._find_sql(start, end)]) 3399 3400 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3401 return self._parse_function_call() 3402 3403 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3404 start_index = self._index 3405 should_fallback_to_string = False 3406 3407 hints = [] 3408 try: 3409 for hint in iter( 3410 lambda: self._parse_csv( 3411 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3412 ), 3413 [], 3414 ): 3415 hints.extend(hint) 3416 except ParseError: 3417 should_fallback_to_string = True 3418 3419 if should_fallback_to_string or self._curr: 3420 self._retreat(start_index) 3421 return self._parse_hint_fallback_to_string() 3422 3423 return self.expression(exp.Hint, expressions=hints) 3424 3425 def _parse_hint(self) -> t.Optional[exp.Hint]: 3426 if self._match(TokenType.HINT) and self._prev_comments: 3427 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3428 3429 return None 3430 3431 def _parse_into(self) -> t.Optional[exp.Into]: 3432 if not self._match(TokenType.INTO): 3433 return None 3434 3435 temp = self._match(TokenType.TEMPORARY) 3436 unlogged = self._match_text_seq("UNLOGGED") 3437 self._match(TokenType.TABLE) 3438 3439 return self.expression( 3440 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3441 ) 3442 3443 def _parse_from( 3444 self, joins: bool = False, skip_from_token: bool = False 3445 ) -> t.Optional[exp.From]: 3446 if not skip_from_token and not self._match(TokenType.FROM): 3447 return None 3448 3449 return self.expression( 3450 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3451 ) 3452 3453 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3454 return self.expression( 3455 exp.MatchRecognizeMeasure, 3456 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3457 this=self._parse_expression(), 3458 ) 3459 3460 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3461 if not self._match(TokenType.MATCH_RECOGNIZE): 3462 return None 3463 3464 self._match_l_paren() 3465 3466 partition = self._parse_partition_by() 3467 order = self._parse_order() 3468 3469 measures = ( 3470 self._parse_csv(self._parse_match_recognize_measure) 3471 if self._match_text_seq("MEASURES") 3472 else None 3473 ) 3474 3475 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3476 rows = exp.var("ONE ROW PER MATCH") 3477 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3478 text = "ALL ROWS PER MATCH" 3479 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3480 text += " SHOW EMPTY MATCHES" 3481 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3482 text += " OMIT EMPTY MATCHES" 3483 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3484 text += " WITH UNMATCHED ROWS" 3485 rows = exp.var(text) 3486 else: 3487 rows = None 3488 3489 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3490 text = "AFTER MATCH SKIP" 3491 if self._match_text_seq("PAST", "LAST", "ROW"): 3492 text += " PAST LAST ROW" 3493 elif self._match_text_seq("TO", "NEXT", "ROW"): 3494 text += " TO NEXT ROW" 3495 elif self._match_text_seq("TO", "FIRST"): 3496 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3497 elif self._match_text_seq("TO", "LAST"): 3498 text += f" TO LAST {self._advance_any().text}" # type: ignore 3499 after = exp.var(text) 3500 else: 3501 after = None 3502 3503 if self._match_text_seq("PATTERN"): 3504 self._match_l_paren() 3505 3506 if not self._curr: 3507 self.raise_error("Expecting )", self._curr) 3508 3509 paren = 1 3510 start = self._curr 3511 3512 while self._curr and paren > 0: 3513 if self._curr.token_type == TokenType.L_PAREN: 3514 paren += 1 3515 if self._curr.token_type == TokenType.R_PAREN: 3516 paren -= 1 3517 3518 end = self._prev 3519 self._advance() 3520 3521 if paren > 0: 3522 self.raise_error("Expecting )", self._curr) 3523 3524 pattern = exp.var(self._find_sql(start, end)) 3525 else: 3526 pattern = None 3527 3528 define = ( 3529 self._parse_csv(self._parse_name_as_expression) 3530 if self._match_text_seq("DEFINE") 3531 else None 3532 ) 3533 3534 self._match_r_paren() 3535 3536 return self.expression( 3537 exp.MatchRecognize, 3538 partition_by=partition, 3539 order=order, 3540 measures=measures, 3541 rows=rows, 3542 after=after, 3543 pattern=pattern, 3544 define=define, 3545 alias=self._parse_table_alias(), 3546 ) 3547 3548 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3549 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3550 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3551 cross_apply = False 3552 3553 if cross_apply is not None: 3554 this = self._parse_select(table=True) 3555 view = None 3556 outer = None 3557 elif self._match(TokenType.LATERAL): 3558 this = self._parse_select(table=True) 3559 view = self._match(TokenType.VIEW) 3560 outer = self._match(TokenType.OUTER) 3561 else: 3562 return None 3563 3564 if not this: 3565 this = ( 3566 self._parse_unnest() 3567 or self._parse_function() 3568 or self._parse_id_var(any_token=False) 3569 ) 3570 3571 while self._match(TokenType.DOT): 3572 this = exp.Dot( 3573 this=this, 3574 expression=self._parse_function() or self._parse_id_var(any_token=False), 3575 ) 3576 3577 if view: 3578 table = self._parse_id_var(any_token=False) 3579 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3580 table_alias: t.Optional[exp.TableAlias] = self.expression( 3581 exp.TableAlias, this=table, columns=columns 3582 ) 3583 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3584 # We move the alias from the lateral's child node to the lateral itself 3585 table_alias = this.args["alias"].pop() 3586 else: 3587 table_alias = self._parse_table_alias() 3588 3589 return self.expression( 3590 exp.Lateral, 3591 this=this, 3592 view=view, 3593 outer=outer, 3594 alias=table_alias, 3595 cross_apply=cross_apply, 3596 ) 3597 3598 def _parse_join_parts( 3599 self, 3600 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3601 return ( 3602 self._match_set(self.JOIN_METHODS) and self._prev, 3603 self._match_set(self.JOIN_SIDES) and self._prev, 3604 self._match_set(self.JOIN_KINDS) and self._prev, 3605 ) 3606 3607 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3608 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3609 this = self._parse_column() 3610 if isinstance(this, exp.Column): 3611 return this.this 3612 return this 3613 3614 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3615 3616 def _parse_join( 3617 self, skip_join_token: bool = False, parse_bracket: bool = False 3618 ) -> t.Optional[exp.Join]: 3619 if self._match(TokenType.COMMA): 3620 table = self._try_parse(self._parse_table) 3621 if table: 3622 return self.expression(exp.Join, this=table) 3623 return None 3624 3625 index = self._index 3626 method, side, kind = self._parse_join_parts() 3627 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3628 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3629 3630 if not skip_join_token and not join: 3631 self._retreat(index) 3632 kind = None 3633 method = None 3634 side = None 3635 3636 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3637 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3638 3639 if not skip_join_token and not join and not outer_apply and not cross_apply: 3640 return None 3641 3642 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3643 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3644 kwargs["expressions"] = self._parse_csv( 3645 lambda: self._parse_table(parse_bracket=parse_bracket) 3646 ) 3647 3648 if method: 3649 kwargs["method"] = method.text 3650 if side: 3651 kwargs["side"] = side.text 3652 if kind: 3653 kwargs["kind"] = kind.text 3654 if hint: 3655 kwargs["hint"] = hint 3656 3657 if self._match(TokenType.MATCH_CONDITION): 3658 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3659 3660 if self._match(TokenType.ON): 3661 kwargs["on"] = self._parse_assignment() 3662 elif self._match(TokenType.USING): 3663 kwargs["using"] = self._parse_using_identifiers() 3664 elif ( 3665 not (outer_apply or cross_apply) 3666 and not isinstance(kwargs["this"], exp.Unnest) 3667 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3668 ): 3669 index = self._index 3670 joins: t.Optional[list] = list(self._parse_joins()) 3671 3672 if joins and self._match(TokenType.ON): 3673 kwargs["on"] = self._parse_assignment() 3674 elif joins and self._match(TokenType.USING): 3675 kwargs["using"] = self._parse_using_identifiers() 3676 else: 3677 joins = None 3678 self._retreat(index) 3679 3680 kwargs["this"].set("joins", joins if joins else None) 3681 3682 comments = [c for token in (method, side, kind) if token for c in token.comments] 3683 return self.expression(exp.Join, comments=comments, **kwargs) 3684 3685 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3686 this = self._parse_assignment() 3687 3688 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3689 return this 3690 3691 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3692 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3693 3694 return this 3695 3696 def _parse_index_params(self) -> exp.IndexParameters: 3697 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3698 3699 if self._match(TokenType.L_PAREN, advance=False): 3700 columns = self._parse_wrapped_csv(self._parse_with_operator) 3701 else: 3702 columns = None 3703 3704 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3705 partition_by = self._parse_partition_by() 3706 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3707 tablespace = ( 3708 self._parse_var(any_token=True) 3709 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3710 else None 3711 ) 3712 where = self._parse_where() 3713 3714 on = self._parse_field() if self._match(TokenType.ON) else None 3715 3716 return self.expression( 3717 exp.IndexParameters, 3718 using=using, 3719 columns=columns, 3720 include=include, 3721 partition_by=partition_by, 3722 where=where, 3723 with_storage=with_storage, 3724 tablespace=tablespace, 3725 on=on, 3726 ) 3727 3728 def _parse_index( 3729 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3730 ) -> t.Optional[exp.Index]: 3731 if index or anonymous: 3732 unique = None 3733 primary = None 3734 amp = None 3735 3736 self._match(TokenType.ON) 3737 self._match(TokenType.TABLE) # hive 3738 table = self._parse_table_parts(schema=True) 3739 else: 3740 unique = self._match(TokenType.UNIQUE) 3741 primary = self._match_text_seq("PRIMARY") 3742 amp = self._match_text_seq("AMP") 3743 3744 if not self._match(TokenType.INDEX): 3745 return None 3746 3747 index = self._parse_id_var() 3748 table = None 3749 3750 params = self._parse_index_params() 3751 3752 return self.expression( 3753 exp.Index, 3754 this=index, 3755 table=table, 3756 unique=unique, 3757 primary=primary, 3758 amp=amp, 3759 params=params, 3760 ) 3761 3762 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3763 hints: t.List[exp.Expression] = [] 3764 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3765 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3766 hints.append( 3767 self.expression( 3768 exp.WithTableHint, 3769 expressions=self._parse_csv( 3770 lambda: self._parse_function() or self._parse_var(any_token=True) 3771 ), 3772 ) 3773 ) 3774 self._match_r_paren() 3775 else: 3776 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3777 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3778 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3779 3780 self._match_set((TokenType.INDEX, TokenType.KEY)) 3781 if self._match(TokenType.FOR): 3782 hint.set("target", self._advance_any() and self._prev.text.upper()) 3783 3784 hint.set("expressions", self._parse_wrapped_id_vars()) 3785 hints.append(hint) 3786 3787 return hints or None 3788 3789 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3790 return ( 3791 (not schema and self._parse_function(optional_parens=False)) 3792 or self._parse_id_var(any_token=False) 3793 or self._parse_string_as_identifier() 3794 or self._parse_placeholder() 3795 ) 3796 3797 def _parse_table_parts( 3798 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3799 ) -> exp.Table: 3800 catalog = None 3801 db = None 3802 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3803 3804 while self._match(TokenType.DOT): 3805 if catalog: 3806 # This allows nesting the table in arbitrarily many dot expressions if needed 3807 table = self.expression( 3808 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3809 ) 3810 else: 3811 catalog = db 3812 db = table 3813 # "" used for tsql FROM a..b case 3814 table = self._parse_table_part(schema=schema) or "" 3815 3816 if ( 3817 wildcard 3818 and self._is_connected() 3819 and (isinstance(table, exp.Identifier) or not table) 3820 and self._match(TokenType.STAR) 3821 ): 3822 if isinstance(table, exp.Identifier): 3823 table.args["this"] += "*" 3824 else: 3825 table = exp.Identifier(this="*") 3826 3827 # We bubble up comments from the Identifier to the Table 3828 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3829 3830 if is_db_reference: 3831 catalog = db 3832 db = table 3833 table = None 3834 3835 if not table and not is_db_reference: 3836 self.raise_error(f"Expected table name but got {self._curr}") 3837 if not db and is_db_reference: 3838 self.raise_error(f"Expected database name but got {self._curr}") 3839 3840 table = self.expression( 3841 exp.Table, 3842 comments=comments, 3843 this=table, 3844 db=db, 3845 catalog=catalog, 3846 ) 3847 3848 changes = self._parse_changes() 3849 if changes: 3850 table.set("changes", changes) 3851 3852 at_before = self._parse_historical_data() 3853 if at_before: 3854 table.set("when", at_before) 3855 3856 pivots = self._parse_pivots() 3857 if pivots: 3858 table.set("pivots", pivots) 3859 3860 return table 3861 3862 def _parse_table( 3863 self, 3864 schema: bool = False, 3865 joins: bool = False, 3866 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3867 parse_bracket: bool = False, 3868 is_db_reference: bool = False, 3869 parse_partition: bool = False, 3870 ) -> t.Optional[exp.Expression]: 3871 lateral = self._parse_lateral() 3872 if lateral: 3873 return lateral 3874 3875 unnest = self._parse_unnest() 3876 if unnest: 3877 return unnest 3878 3879 values = self._parse_derived_table_values() 3880 if values: 3881 return values 3882 3883 subquery = self._parse_select(table=True) 3884 if subquery: 3885 if not subquery.args.get("pivots"): 3886 subquery.set("pivots", self._parse_pivots()) 3887 return subquery 3888 3889 bracket = parse_bracket and self._parse_bracket(None) 3890 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3891 3892 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3893 self._parse_table 3894 ) 3895 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3896 3897 only = self._match(TokenType.ONLY) 3898 3899 this = t.cast( 3900 exp.Expression, 3901 bracket 3902 or rows_from 3903 or self._parse_bracket( 3904 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3905 ), 3906 ) 3907 3908 if only: 3909 this.set("only", only) 3910 3911 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3912 self._match_text_seq("*") 3913 3914 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3915 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3916 this.set("partition", self._parse_partition()) 3917 3918 if schema: 3919 return self._parse_schema(this=this) 3920 3921 version = self._parse_version() 3922 3923 if version: 3924 this.set("version", version) 3925 3926 if self.dialect.ALIAS_POST_TABLESAMPLE: 3927 this.set("sample", self._parse_table_sample()) 3928 3929 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3930 if alias: 3931 this.set("alias", alias) 3932 3933 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3934 return self.expression( 3935 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3936 ) 3937 3938 this.set("hints", self._parse_table_hints()) 3939 3940 if not this.args.get("pivots"): 3941 this.set("pivots", self._parse_pivots()) 3942 3943 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3944 this.set("sample", self._parse_table_sample()) 3945 3946 if joins: 3947 for join in self._parse_joins(): 3948 this.append("joins", join) 3949 3950 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3951 this.set("ordinality", True) 3952 this.set("alias", self._parse_table_alias()) 3953 3954 return this 3955 3956 def _parse_version(self) -> t.Optional[exp.Version]: 3957 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3958 this = "TIMESTAMP" 3959 elif self._match(TokenType.VERSION_SNAPSHOT): 3960 this = "VERSION" 3961 else: 3962 return None 3963 3964 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3965 kind = self._prev.text.upper() 3966 start = self._parse_bitwise() 3967 self._match_texts(("TO", "AND")) 3968 end = self._parse_bitwise() 3969 expression: t.Optional[exp.Expression] = self.expression( 3970 exp.Tuple, expressions=[start, end] 3971 ) 3972 elif self._match_text_seq("CONTAINED", "IN"): 3973 kind = "CONTAINED IN" 3974 expression = self.expression( 3975 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3976 ) 3977 elif self._match(TokenType.ALL): 3978 kind = "ALL" 3979 expression = None 3980 else: 3981 self._match_text_seq("AS", "OF") 3982 kind = "AS OF" 3983 expression = self._parse_type() 3984 3985 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3986 3987 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3988 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3989 index = self._index 3990 historical_data = None 3991 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3992 this = self._prev.text.upper() 3993 kind = ( 3994 self._match(TokenType.L_PAREN) 3995 and self._match_texts(self.HISTORICAL_DATA_KIND) 3996 and self._prev.text.upper() 3997 ) 3998 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3999 4000 if expression: 4001 self._match_r_paren() 4002 historical_data = self.expression( 4003 exp.HistoricalData, this=this, kind=kind, expression=expression 4004 ) 4005 else: 4006 self._retreat(index) 4007 4008 return historical_data 4009 4010 def _parse_changes(self) -> t.Optional[exp.Changes]: 4011 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4012 return None 4013 4014 information = self._parse_var(any_token=True) 4015 self._match_r_paren() 4016 4017 return self.expression( 4018 exp.Changes, 4019 information=information, 4020 at_before=self._parse_historical_data(), 4021 end=self._parse_historical_data(), 4022 ) 4023 4024 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4025 if not self._match(TokenType.UNNEST): 4026 return None 4027 4028 expressions = self._parse_wrapped_csv(self._parse_equality) 4029 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4030 4031 alias = self._parse_table_alias() if with_alias else None 4032 4033 if alias: 4034 if self.dialect.UNNEST_COLUMN_ONLY: 4035 if alias.args.get("columns"): 4036 self.raise_error("Unexpected extra column alias in unnest.") 4037 4038 alias.set("columns", [alias.this]) 4039 alias.set("this", None) 4040 4041 columns = alias.args.get("columns") or [] 4042 if offset and len(expressions) < len(columns): 4043 offset = columns.pop() 4044 4045 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4046 self._match(TokenType.ALIAS) 4047 offset = self._parse_id_var( 4048 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4049 ) or exp.to_identifier("offset") 4050 4051 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4052 4053 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4054 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4055 if not is_derived and not ( 4056 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4057 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4058 ): 4059 return None 4060 4061 expressions = self._parse_csv(self._parse_value) 4062 alias = self._parse_table_alias() 4063 4064 if is_derived: 4065 self._match_r_paren() 4066 4067 return self.expression( 4068 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4069 ) 4070 4071 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4072 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4073 as_modifier and self._match_text_seq("USING", "SAMPLE") 4074 ): 4075 return None 4076 4077 bucket_numerator = None 4078 bucket_denominator = None 4079 bucket_field = None 4080 percent = None 4081 size = None 4082 seed = None 4083 4084 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4085 matched_l_paren = self._match(TokenType.L_PAREN) 4086 4087 if self.TABLESAMPLE_CSV: 4088 num = None 4089 expressions = self._parse_csv(self._parse_primary) 4090 else: 4091 expressions = None 4092 num = ( 4093 self._parse_factor() 4094 if self._match(TokenType.NUMBER, advance=False) 4095 else self._parse_primary() or self._parse_placeholder() 4096 ) 4097 4098 if self._match_text_seq("BUCKET"): 4099 bucket_numerator = self._parse_number() 4100 self._match_text_seq("OUT", "OF") 4101 bucket_denominator = bucket_denominator = self._parse_number() 4102 self._match(TokenType.ON) 4103 bucket_field = self._parse_field() 4104 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4105 percent = num 4106 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4107 size = num 4108 else: 4109 percent = num 4110 4111 if matched_l_paren: 4112 self._match_r_paren() 4113 4114 if self._match(TokenType.L_PAREN): 4115 method = self._parse_var(upper=True) 4116 seed = self._match(TokenType.COMMA) and self._parse_number() 4117 self._match_r_paren() 4118 elif self._match_texts(("SEED", "REPEATABLE")): 4119 seed = self._parse_wrapped(self._parse_number) 4120 4121 if not method and self.DEFAULT_SAMPLING_METHOD: 4122 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4123 4124 return self.expression( 4125 exp.TableSample, 4126 expressions=expressions, 4127 method=method, 4128 bucket_numerator=bucket_numerator, 4129 bucket_denominator=bucket_denominator, 4130 bucket_field=bucket_field, 4131 percent=percent, 4132 size=size, 4133 seed=seed, 4134 ) 4135 4136 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4137 return list(iter(self._parse_pivot, None)) or None 4138 4139 def _parse_joins(self) -> t.Iterator[exp.Join]: 4140 return iter(self._parse_join, None) 4141 4142 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4143 if not self._match(TokenType.INTO): 4144 return None 4145 4146 return self.expression( 4147 exp.UnpivotColumns, 4148 this=self._match_text_seq("NAME") and self._parse_column(), 4149 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4150 ) 4151 4152 # https://duckdb.org/docs/sql/statements/pivot 4153 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4154 def _parse_on() -> t.Optional[exp.Expression]: 4155 this = self._parse_bitwise() 4156 4157 if self._match(TokenType.IN): 4158 # PIVOT ... ON col IN (row_val1, row_val2) 4159 return self._parse_in(this) 4160 if self._match(TokenType.ALIAS, advance=False): 4161 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4162 return self._parse_alias(this) 4163 4164 return this 4165 4166 this = self._parse_table() 4167 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4168 into = self._parse_unpivot_columns() 4169 using = self._match(TokenType.USING) and self._parse_csv( 4170 lambda: self._parse_alias(self._parse_function()) 4171 ) 4172 group = self._parse_group() 4173 4174 return self.expression( 4175 exp.Pivot, 4176 this=this, 4177 expressions=expressions, 4178 using=using, 4179 group=group, 4180 unpivot=is_unpivot, 4181 into=into, 4182 ) 4183 4184 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4185 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4186 this = self._parse_select_or_expression() 4187 4188 self._match(TokenType.ALIAS) 4189 alias = self._parse_bitwise() 4190 if alias: 4191 if isinstance(alias, exp.Column) and not alias.db: 4192 alias = alias.this 4193 return self.expression(exp.PivotAlias, this=this, alias=alias) 4194 4195 return this 4196 4197 value = self._parse_column() 4198 4199 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4200 self.raise_error("Expecting IN (") 4201 4202 if self._match(TokenType.ANY): 4203 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4204 else: 4205 exprs = self._parse_csv(_parse_aliased_expression) 4206 4207 self._match_r_paren() 4208 return self.expression(exp.In, this=value, expressions=exprs) 4209 4210 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4211 index = self._index 4212 include_nulls = None 4213 4214 if self._match(TokenType.PIVOT): 4215 unpivot = False 4216 elif self._match(TokenType.UNPIVOT): 4217 unpivot = True 4218 4219 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4220 if self._match_text_seq("INCLUDE", "NULLS"): 4221 include_nulls = True 4222 elif self._match_text_seq("EXCLUDE", "NULLS"): 4223 include_nulls = False 4224 else: 4225 return None 4226 4227 expressions = [] 4228 4229 if not self._match(TokenType.L_PAREN): 4230 self._retreat(index) 4231 return None 4232 4233 if unpivot: 4234 expressions = self._parse_csv(self._parse_column) 4235 else: 4236 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4237 4238 if not expressions: 4239 self.raise_error("Failed to parse PIVOT's aggregation list") 4240 4241 if not self._match(TokenType.FOR): 4242 self.raise_error("Expecting FOR") 4243 4244 field = self._parse_pivot_in() 4245 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4246 self._parse_bitwise 4247 ) 4248 4249 self._match_r_paren() 4250 4251 pivot = self.expression( 4252 exp.Pivot, 4253 expressions=expressions, 4254 field=field, 4255 unpivot=unpivot, 4256 include_nulls=include_nulls, 4257 default_on_null=default_on_null, 4258 ) 4259 4260 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4261 pivot.set("alias", self._parse_table_alias()) 4262 4263 if not unpivot: 4264 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4265 4266 columns: t.List[exp.Expression] = [] 4267 pivot_field_expressions = pivot.args["field"].expressions 4268 4269 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4270 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4271 for fld in pivot_field_expressions: 4272 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4273 for name in names: 4274 if self.PREFIXED_PIVOT_COLUMNS: 4275 name = f"{name}_{field_name}" if name else field_name 4276 else: 4277 name = f"{field_name}_{name}" if name else field_name 4278 4279 columns.append(exp.to_identifier(name)) 4280 4281 pivot.set("columns", columns) 4282 4283 return pivot 4284 4285 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4286 return [agg.alias for agg in aggregations] 4287 4288 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4289 if not skip_where_token and not self._match(TokenType.PREWHERE): 4290 return None 4291 4292 return self.expression( 4293 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4294 ) 4295 4296 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4297 if not skip_where_token and not self._match(TokenType.WHERE): 4298 return None 4299 4300 return self.expression( 4301 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4302 ) 4303 4304 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4305 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4306 return None 4307 4308 elements: t.Dict[str, t.Any] = defaultdict(list) 4309 4310 if self._match(TokenType.ALL): 4311 elements["all"] = True 4312 elif self._match(TokenType.DISTINCT): 4313 elements["all"] = False 4314 4315 while True: 4316 index = self._index 4317 4318 elements["expressions"].extend( 4319 self._parse_csv( 4320 lambda: None 4321 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4322 else self._parse_assignment() 4323 ) 4324 ) 4325 4326 before_with_index = self._index 4327 with_prefix = self._match(TokenType.WITH) 4328 4329 if self._match(TokenType.ROLLUP): 4330 elements["rollup"].append( 4331 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4332 ) 4333 elif self._match(TokenType.CUBE): 4334 elements["cube"].append( 4335 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4336 ) 4337 elif self._match(TokenType.GROUPING_SETS): 4338 elements["grouping_sets"].append( 4339 self.expression( 4340 exp.GroupingSets, 4341 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4342 ) 4343 ) 4344 elif self._match_text_seq("TOTALS"): 4345 elements["totals"] = True # type: ignore 4346 4347 if before_with_index <= self._index <= before_with_index + 1: 4348 self._retreat(before_with_index) 4349 break 4350 4351 if index == self._index: 4352 break 4353 4354 return self.expression(exp.Group, **elements) # type: ignore 4355 4356 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4357 return self.expression( 4358 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4359 ) 4360 4361 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4362 if self._match(TokenType.L_PAREN): 4363 grouping_set = self._parse_csv(self._parse_column) 4364 self._match_r_paren() 4365 return self.expression(exp.Tuple, expressions=grouping_set) 4366 4367 return self._parse_column() 4368 4369 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4370 if not skip_having_token and not self._match(TokenType.HAVING): 4371 return None 4372 return self.expression(exp.Having, this=self._parse_assignment()) 4373 4374 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4375 if not self._match(TokenType.QUALIFY): 4376 return None 4377 return self.expression(exp.Qualify, this=self._parse_assignment()) 4378 4379 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4380 if skip_start_token: 4381 start = None 4382 elif self._match(TokenType.START_WITH): 4383 start = self._parse_assignment() 4384 else: 4385 return None 4386 4387 self._match(TokenType.CONNECT_BY) 4388 nocycle = self._match_text_seq("NOCYCLE") 4389 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4390 exp.Prior, this=self._parse_bitwise() 4391 ) 4392 connect = self._parse_assignment() 4393 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4394 4395 if not start and self._match(TokenType.START_WITH): 4396 start = self._parse_assignment() 4397 4398 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4399 4400 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4401 this = self._parse_id_var(any_token=True) 4402 if self._match(TokenType.ALIAS): 4403 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4404 return this 4405 4406 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4407 if self._match_text_seq("INTERPOLATE"): 4408 return self._parse_wrapped_csv(self._parse_name_as_expression) 4409 return None 4410 4411 def _parse_order( 4412 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4413 ) -> t.Optional[exp.Expression]: 4414 siblings = None 4415 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4416 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4417 return this 4418 4419 siblings = True 4420 4421 return self.expression( 4422 exp.Order, 4423 this=this, 4424 expressions=self._parse_csv(self._parse_ordered), 4425 siblings=siblings, 4426 ) 4427 4428 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4429 if not self._match(token): 4430 return None 4431 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4432 4433 def _parse_ordered( 4434 self, parse_method: t.Optional[t.Callable] = None 4435 ) -> t.Optional[exp.Ordered]: 4436 this = parse_method() if parse_method else self._parse_assignment() 4437 if not this: 4438 return None 4439 4440 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4441 this = exp.var("ALL") 4442 4443 asc = self._match(TokenType.ASC) 4444 desc = self._match(TokenType.DESC) or (asc and False) 4445 4446 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4447 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4448 4449 nulls_first = is_nulls_first or False 4450 explicitly_null_ordered = is_nulls_first or is_nulls_last 4451 4452 if ( 4453 not explicitly_null_ordered 4454 and ( 4455 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4456 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4457 ) 4458 and self.dialect.NULL_ORDERING != "nulls_are_last" 4459 ): 4460 nulls_first = True 4461 4462 if self._match_text_seq("WITH", "FILL"): 4463 with_fill = self.expression( 4464 exp.WithFill, 4465 **{ # type: ignore 4466 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4467 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4468 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4469 "interpolate": self._parse_interpolate(), 4470 }, 4471 ) 4472 else: 4473 with_fill = None 4474 4475 return self.expression( 4476 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4477 ) 4478 4479 def _parse_limit_options(self) -> exp.LimitOptions: 4480 percent = self._match(TokenType.PERCENT) 4481 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4482 self._match_text_seq("ONLY") 4483 with_ties = self._match_text_seq("WITH", "TIES") 4484 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4485 4486 def _parse_limit( 4487 self, 4488 this: t.Optional[exp.Expression] = None, 4489 top: bool = False, 4490 skip_limit_token: bool = False, 4491 ) -> t.Optional[exp.Expression]: 4492 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4493 comments = self._prev_comments 4494 if top: 4495 limit_paren = self._match(TokenType.L_PAREN) 4496 expression = self._parse_term() if limit_paren else self._parse_number() 4497 4498 if limit_paren: 4499 self._match_r_paren() 4500 4501 limit_options = self._parse_limit_options() 4502 else: 4503 limit_options = None 4504 expression = self._parse_term() 4505 4506 if self._match(TokenType.COMMA): 4507 offset = expression 4508 expression = self._parse_term() 4509 else: 4510 offset = None 4511 4512 limit_exp = self.expression( 4513 exp.Limit, 4514 this=this, 4515 expression=expression, 4516 offset=offset, 4517 comments=comments, 4518 limit_options=limit_options, 4519 expressions=self._parse_limit_by(), 4520 ) 4521 4522 return limit_exp 4523 4524 if self._match(TokenType.FETCH): 4525 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4526 direction = self._prev.text.upper() if direction else "FIRST" 4527 4528 count = self._parse_field(tokens=self.FETCH_TOKENS) 4529 4530 return self.expression( 4531 exp.Fetch, 4532 direction=direction, 4533 count=count, 4534 limit_options=self._parse_limit_options(), 4535 ) 4536 4537 return this 4538 4539 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4540 if not self._match(TokenType.OFFSET): 4541 return this 4542 4543 count = self._parse_term() 4544 self._match_set((TokenType.ROW, TokenType.ROWS)) 4545 4546 return self.expression( 4547 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4548 ) 4549 4550 def _can_parse_limit_or_offset(self) -> bool: 4551 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4552 return False 4553 4554 index = self._index 4555 result = bool( 4556 self._try_parse(self._parse_limit, retreat=True) 4557 or self._try_parse(self._parse_offset, retreat=True) 4558 ) 4559 self._retreat(index) 4560 return result 4561 4562 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4563 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4564 4565 def _parse_locks(self) -> t.List[exp.Lock]: 4566 locks = [] 4567 while True: 4568 if self._match_text_seq("FOR", "UPDATE"): 4569 update = True 4570 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4571 "LOCK", "IN", "SHARE", "MODE" 4572 ): 4573 update = False 4574 else: 4575 break 4576 4577 expressions = None 4578 if self._match_text_seq("OF"): 4579 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4580 4581 wait: t.Optional[bool | exp.Expression] = None 4582 if self._match_text_seq("NOWAIT"): 4583 wait = True 4584 elif self._match_text_seq("WAIT"): 4585 wait = self._parse_primary() 4586 elif self._match_text_seq("SKIP", "LOCKED"): 4587 wait = False 4588 4589 locks.append( 4590 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4591 ) 4592 4593 return locks 4594 4595 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4596 while this and self._match_set(self.SET_OPERATIONS): 4597 token_type = self._prev.token_type 4598 4599 if token_type == TokenType.UNION: 4600 operation: t.Type[exp.SetOperation] = exp.Union 4601 elif token_type == TokenType.EXCEPT: 4602 operation = exp.Except 4603 else: 4604 operation = exp.Intersect 4605 4606 comments = self._prev.comments 4607 4608 if self._match(TokenType.DISTINCT): 4609 distinct: t.Optional[bool] = True 4610 elif self._match(TokenType.ALL): 4611 distinct = False 4612 else: 4613 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4614 if distinct is None: 4615 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4616 4617 by_name = self._match_text_seq("BY", "NAME") 4618 expression = self._parse_select(nested=True, parse_set_operation=False) 4619 4620 this = self.expression( 4621 operation, 4622 comments=comments, 4623 this=this, 4624 distinct=distinct, 4625 by_name=by_name, 4626 expression=expression, 4627 ) 4628 4629 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4630 expression = this.expression 4631 4632 if expression: 4633 for arg in self.SET_OP_MODIFIERS: 4634 expr = expression.args.get(arg) 4635 if expr: 4636 this.set(arg, expr.pop()) 4637 4638 return this 4639 4640 def _parse_expression(self) -> t.Optional[exp.Expression]: 4641 return self._parse_alias(self._parse_assignment()) 4642 4643 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4644 this = self._parse_disjunction() 4645 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4646 # This allows us to parse <non-identifier token> := <expr> 4647 this = exp.column( 4648 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4649 ) 4650 4651 while self._match_set(self.ASSIGNMENT): 4652 if isinstance(this, exp.Column) and len(this.parts) == 1: 4653 this = this.this 4654 4655 this = self.expression( 4656 self.ASSIGNMENT[self._prev.token_type], 4657 this=this, 4658 comments=self._prev_comments, 4659 expression=self._parse_assignment(), 4660 ) 4661 4662 return this 4663 4664 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4665 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4666 4667 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4668 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4669 4670 def _parse_equality(self) -> t.Optional[exp.Expression]: 4671 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4672 4673 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4674 return self._parse_tokens(self._parse_range, self.COMPARISON) 4675 4676 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4677 this = this or self._parse_bitwise() 4678 negate = self._match(TokenType.NOT) 4679 4680 if self._match_set(self.RANGE_PARSERS): 4681 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4682 if not expression: 4683 return this 4684 4685 this = expression 4686 elif self._match(TokenType.ISNULL): 4687 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4688 4689 # Postgres supports ISNULL and NOTNULL for conditions. 4690 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4691 if self._match(TokenType.NOTNULL): 4692 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4693 this = self.expression(exp.Not, this=this) 4694 4695 if negate: 4696 this = self._negate_range(this) 4697 4698 if self._match(TokenType.IS): 4699 this = self._parse_is(this) 4700 4701 return this 4702 4703 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4704 if not this: 4705 return this 4706 4707 return self.expression(exp.Not, this=this) 4708 4709 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4710 index = self._index - 1 4711 negate = self._match(TokenType.NOT) 4712 4713 if self._match_text_seq("DISTINCT", "FROM"): 4714 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4715 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4716 4717 if self._match(TokenType.JSON): 4718 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4719 4720 if self._match_text_seq("WITH"): 4721 _with = True 4722 elif self._match_text_seq("WITHOUT"): 4723 _with = False 4724 else: 4725 _with = None 4726 4727 unique = self._match(TokenType.UNIQUE) 4728 self._match_text_seq("KEYS") 4729 expression: t.Optional[exp.Expression] = self.expression( 4730 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4731 ) 4732 else: 4733 expression = self._parse_primary() or self._parse_null() 4734 if not expression: 4735 self._retreat(index) 4736 return None 4737 4738 this = self.expression(exp.Is, this=this, expression=expression) 4739 return self.expression(exp.Not, this=this) if negate else this 4740 4741 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4742 unnest = self._parse_unnest(with_alias=False) 4743 if unnest: 4744 this = self.expression(exp.In, this=this, unnest=unnest) 4745 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4746 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4747 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4748 4749 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4750 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4751 else: 4752 this = self.expression(exp.In, this=this, expressions=expressions) 4753 4754 if matched_l_paren: 4755 self._match_r_paren(this) 4756 elif not self._match(TokenType.R_BRACKET, expression=this): 4757 self.raise_error("Expecting ]") 4758 else: 4759 this = self.expression(exp.In, this=this, field=self._parse_column()) 4760 4761 return this 4762 4763 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4764 low = self._parse_bitwise() 4765 self._match(TokenType.AND) 4766 high = self._parse_bitwise() 4767 return self.expression(exp.Between, this=this, low=low, high=high) 4768 4769 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4770 if not self._match(TokenType.ESCAPE): 4771 return this 4772 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4773 4774 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4775 index = self._index 4776 4777 if not self._match(TokenType.INTERVAL) and match_interval: 4778 return None 4779 4780 if self._match(TokenType.STRING, advance=False): 4781 this = self._parse_primary() 4782 else: 4783 this = self._parse_term() 4784 4785 if not this or ( 4786 isinstance(this, exp.Column) 4787 and not this.table 4788 and not this.this.quoted 4789 and this.name.upper() == "IS" 4790 ): 4791 self._retreat(index) 4792 return None 4793 4794 unit = self._parse_function() or ( 4795 not self._match(TokenType.ALIAS, advance=False) 4796 and self._parse_var(any_token=True, upper=True) 4797 ) 4798 4799 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4800 # each INTERVAL expression into this canonical form so it's easy to transpile 4801 if this and this.is_number: 4802 this = exp.Literal.string(this.to_py()) 4803 elif this and this.is_string: 4804 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4805 if parts and unit: 4806 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4807 unit = None 4808 self._retreat(self._index - 1) 4809 4810 if len(parts) == 1: 4811 this = exp.Literal.string(parts[0][0]) 4812 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4813 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4814 unit = self.expression( 4815 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4816 ) 4817 4818 interval = self.expression(exp.Interval, this=this, unit=unit) 4819 4820 index = self._index 4821 self._match(TokenType.PLUS) 4822 4823 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4824 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4825 return self.expression( 4826 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4827 ) 4828 4829 self._retreat(index) 4830 return interval 4831 4832 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4833 this = self._parse_term() 4834 4835 while True: 4836 if self._match_set(self.BITWISE): 4837 this = self.expression( 4838 self.BITWISE[self._prev.token_type], 4839 this=this, 4840 expression=self._parse_term(), 4841 ) 4842 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4843 this = self.expression( 4844 exp.DPipe, 4845 this=this, 4846 expression=self._parse_term(), 4847 safe=not self.dialect.STRICT_STRING_CONCAT, 4848 ) 4849 elif self._match(TokenType.DQMARK): 4850 this = self.expression( 4851 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4852 ) 4853 elif self._match_pair(TokenType.LT, TokenType.LT): 4854 this = self.expression( 4855 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4856 ) 4857 elif self._match_pair(TokenType.GT, TokenType.GT): 4858 this = self.expression( 4859 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4860 ) 4861 else: 4862 break 4863 4864 return this 4865 4866 def _parse_term(self) -> t.Optional[exp.Expression]: 4867 this = self._parse_factor() 4868 4869 while self._match_set(self.TERM): 4870 klass = self.TERM[self._prev.token_type] 4871 comments = self._prev_comments 4872 expression = self._parse_factor() 4873 4874 this = self.expression(klass, this=this, comments=comments, expression=expression) 4875 4876 if isinstance(this, exp.Collate): 4877 expr = this.expression 4878 4879 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4880 # fallback to Identifier / Var 4881 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4882 ident = expr.this 4883 if isinstance(ident, exp.Identifier): 4884 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4885 4886 return this 4887 4888 def _parse_factor(self) -> t.Optional[exp.Expression]: 4889 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4890 this = parse_method() 4891 4892 while self._match_set(self.FACTOR): 4893 klass = self.FACTOR[self._prev.token_type] 4894 comments = self._prev_comments 4895 expression = parse_method() 4896 4897 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4898 self._retreat(self._index - 1) 4899 return this 4900 4901 this = self.expression(klass, this=this, comments=comments, expression=expression) 4902 4903 if isinstance(this, exp.Div): 4904 this.args["typed"] = self.dialect.TYPED_DIVISION 4905 this.args["safe"] = self.dialect.SAFE_DIVISION 4906 4907 return this 4908 4909 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4910 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4911 4912 def _parse_unary(self) -> t.Optional[exp.Expression]: 4913 if self._match_set(self.UNARY_PARSERS): 4914 return self.UNARY_PARSERS[self._prev.token_type](self) 4915 return self._parse_at_time_zone(self._parse_type()) 4916 4917 def _parse_type( 4918 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4919 ) -> t.Optional[exp.Expression]: 4920 interval = parse_interval and self._parse_interval() 4921 if interval: 4922 return interval 4923 4924 index = self._index 4925 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4926 4927 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4928 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4929 if isinstance(data_type, exp.Cast): 4930 # This constructor can contain ops directly after it, for instance struct unnesting: 4931 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4932 return self._parse_column_ops(data_type) 4933 4934 if data_type: 4935 index2 = self._index 4936 this = self._parse_primary() 4937 4938 if isinstance(this, exp.Literal): 4939 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4940 if parser: 4941 return parser(self, this, data_type) 4942 4943 return self.expression(exp.Cast, this=this, to=data_type) 4944 4945 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4946 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4947 # 4948 # If the index difference here is greater than 1, that means the parser itself must have 4949 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4950 # 4951 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4952 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4953 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4954 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4955 # 4956 # In these cases, we don't really want to return the converted type, but instead retreat 4957 # and try to parse a Column or Identifier in the section below. 4958 if data_type.expressions and index2 - index > 1: 4959 self._retreat(index2) 4960 return self._parse_column_ops(data_type) 4961 4962 self._retreat(index) 4963 4964 if fallback_to_identifier: 4965 return self._parse_id_var() 4966 4967 this = self._parse_column() 4968 return this and self._parse_column_ops(this) 4969 4970 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4971 this = self._parse_type() 4972 if not this: 4973 return None 4974 4975 if isinstance(this, exp.Column) and not this.table: 4976 this = exp.var(this.name.upper()) 4977 4978 return self.expression( 4979 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4980 ) 4981 4982 def _parse_types( 4983 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4984 ) -> t.Optional[exp.Expression]: 4985 index = self._index 4986 4987 this: t.Optional[exp.Expression] = None 4988 prefix = self._match_text_seq("SYSUDTLIB", ".") 4989 4990 if not self._match_set(self.TYPE_TOKENS): 4991 identifier = allow_identifiers and self._parse_id_var( 4992 any_token=False, tokens=(TokenType.VAR,) 4993 ) 4994 if isinstance(identifier, exp.Identifier): 4995 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4996 4997 if len(tokens) != 1: 4998 self.raise_error("Unexpected identifier", self._prev) 4999 5000 if tokens[0].token_type in self.TYPE_TOKENS: 5001 self._prev = tokens[0] 5002 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5003 type_name = identifier.name 5004 5005 while self._match(TokenType.DOT): 5006 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5007 5008 this = exp.DataType.build(type_name, udt=True) 5009 else: 5010 self._retreat(self._index - 1) 5011 return None 5012 else: 5013 return None 5014 5015 type_token = self._prev.token_type 5016 5017 if type_token == TokenType.PSEUDO_TYPE: 5018 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5019 5020 if type_token == TokenType.OBJECT_IDENTIFIER: 5021 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5022 5023 # https://materialize.com/docs/sql/types/map/ 5024 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5025 key_type = self._parse_types( 5026 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5027 ) 5028 if not self._match(TokenType.FARROW): 5029 self._retreat(index) 5030 return None 5031 5032 value_type = self._parse_types( 5033 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5034 ) 5035 if not self._match(TokenType.R_BRACKET): 5036 self._retreat(index) 5037 return None 5038 5039 return exp.DataType( 5040 this=exp.DataType.Type.MAP, 5041 expressions=[key_type, value_type], 5042 nested=True, 5043 prefix=prefix, 5044 ) 5045 5046 nested = type_token in self.NESTED_TYPE_TOKENS 5047 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5048 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5049 expressions = None 5050 maybe_func = False 5051 5052 if self._match(TokenType.L_PAREN): 5053 if is_struct: 5054 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5055 elif nested: 5056 expressions = self._parse_csv( 5057 lambda: self._parse_types( 5058 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5059 ) 5060 ) 5061 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5062 this = expressions[0] 5063 this.set("nullable", True) 5064 self._match_r_paren() 5065 return this 5066 elif type_token in self.ENUM_TYPE_TOKENS: 5067 expressions = self._parse_csv(self._parse_equality) 5068 elif is_aggregate: 5069 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5070 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5071 ) 5072 if not func_or_ident: 5073 return None 5074 expressions = [func_or_ident] 5075 if self._match(TokenType.COMMA): 5076 expressions.extend( 5077 self._parse_csv( 5078 lambda: self._parse_types( 5079 check_func=check_func, 5080 schema=schema, 5081 allow_identifiers=allow_identifiers, 5082 ) 5083 ) 5084 ) 5085 else: 5086 expressions = self._parse_csv(self._parse_type_size) 5087 5088 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5089 if type_token == TokenType.VECTOR and len(expressions) == 2: 5090 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5091 5092 if not expressions or not self._match(TokenType.R_PAREN): 5093 self._retreat(index) 5094 return None 5095 5096 maybe_func = True 5097 5098 values: t.Optional[t.List[exp.Expression]] = None 5099 5100 if nested and self._match(TokenType.LT): 5101 if is_struct: 5102 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5103 else: 5104 expressions = self._parse_csv( 5105 lambda: self._parse_types( 5106 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5107 ) 5108 ) 5109 5110 if not self._match(TokenType.GT): 5111 self.raise_error("Expecting >") 5112 5113 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5114 values = self._parse_csv(self._parse_assignment) 5115 if not values and is_struct: 5116 values = None 5117 self._retreat(self._index - 1) 5118 else: 5119 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5120 5121 if type_token in self.TIMESTAMPS: 5122 if self._match_text_seq("WITH", "TIME", "ZONE"): 5123 maybe_func = False 5124 tz_type = ( 5125 exp.DataType.Type.TIMETZ 5126 if type_token in self.TIMES 5127 else exp.DataType.Type.TIMESTAMPTZ 5128 ) 5129 this = exp.DataType(this=tz_type, expressions=expressions) 5130 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5131 maybe_func = False 5132 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5133 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5134 maybe_func = False 5135 elif type_token == TokenType.INTERVAL: 5136 unit = self._parse_var(upper=True) 5137 if unit: 5138 if self._match_text_seq("TO"): 5139 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5140 5141 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5142 else: 5143 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5144 5145 if maybe_func and check_func: 5146 index2 = self._index 5147 peek = self._parse_string() 5148 5149 if not peek: 5150 self._retreat(index) 5151 return None 5152 5153 self._retreat(index2) 5154 5155 if not this: 5156 if self._match_text_seq("UNSIGNED"): 5157 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5158 if not unsigned_type_token: 5159 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5160 5161 type_token = unsigned_type_token or type_token 5162 5163 this = exp.DataType( 5164 this=exp.DataType.Type[type_token.value], 5165 expressions=expressions, 5166 nested=nested, 5167 prefix=prefix, 5168 ) 5169 5170 # Empty arrays/structs are allowed 5171 if values is not None: 5172 cls = exp.Struct if is_struct else exp.Array 5173 this = exp.cast(cls(expressions=values), this, copy=False) 5174 5175 elif expressions: 5176 this.set("expressions", expressions) 5177 5178 # https://materialize.com/docs/sql/types/list/#type-name 5179 while self._match(TokenType.LIST): 5180 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5181 5182 index = self._index 5183 5184 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5185 matched_array = self._match(TokenType.ARRAY) 5186 5187 while self._curr: 5188 datatype_token = self._prev.token_type 5189 matched_l_bracket = self._match(TokenType.L_BRACKET) 5190 5191 if (not matched_l_bracket and not matched_array) or ( 5192 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5193 ): 5194 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5195 # not to be confused with the fixed size array parsing 5196 break 5197 5198 matched_array = False 5199 values = self._parse_csv(self._parse_assignment) or None 5200 if ( 5201 values 5202 and not schema 5203 and ( 5204 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5205 ) 5206 ): 5207 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5208 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5209 self._retreat(index) 5210 break 5211 5212 this = exp.DataType( 5213 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5214 ) 5215 self._match(TokenType.R_BRACKET) 5216 5217 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5218 converter = self.TYPE_CONVERTERS.get(this.this) 5219 if converter: 5220 this = converter(t.cast(exp.DataType, this)) 5221 5222 return this 5223 5224 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5225 index = self._index 5226 5227 if ( 5228 self._curr 5229 and self._next 5230 and self._curr.token_type in self.TYPE_TOKENS 5231 and self._next.token_type in self.TYPE_TOKENS 5232 ): 5233 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5234 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5235 this = self._parse_id_var() 5236 else: 5237 this = ( 5238 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5239 or self._parse_id_var() 5240 ) 5241 5242 self._match(TokenType.COLON) 5243 5244 if ( 5245 type_required 5246 and not isinstance(this, exp.DataType) 5247 and not self._match_set(self.TYPE_TOKENS, advance=False) 5248 ): 5249 self._retreat(index) 5250 return self._parse_types() 5251 5252 return self._parse_column_def(this) 5253 5254 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5255 if not self._match_text_seq("AT", "TIME", "ZONE"): 5256 return this 5257 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5258 5259 def _parse_column(self) -> t.Optional[exp.Expression]: 5260 this = self._parse_column_reference() 5261 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5262 5263 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5264 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5265 5266 return column 5267 5268 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5269 this = self._parse_field() 5270 if ( 5271 not this 5272 and self._match(TokenType.VALUES, advance=False) 5273 and self.VALUES_FOLLOWED_BY_PAREN 5274 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5275 ): 5276 this = self._parse_id_var() 5277 5278 if isinstance(this, exp.Identifier): 5279 # We bubble up comments from the Identifier to the Column 5280 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5281 5282 return this 5283 5284 def _parse_colon_as_variant_extract( 5285 self, this: t.Optional[exp.Expression] 5286 ) -> t.Optional[exp.Expression]: 5287 casts = [] 5288 json_path = [] 5289 escape = None 5290 5291 while self._match(TokenType.COLON): 5292 start_index = self._index 5293 5294 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5295 path = self._parse_column_ops( 5296 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5297 ) 5298 5299 # The cast :: operator has a lower precedence than the extraction operator :, so 5300 # we rearrange the AST appropriately to avoid casting the JSON path 5301 while isinstance(path, exp.Cast): 5302 casts.append(path.to) 5303 path = path.this 5304 5305 if casts: 5306 dcolon_offset = next( 5307 i 5308 for i, t in enumerate(self._tokens[start_index:]) 5309 if t.token_type == TokenType.DCOLON 5310 ) 5311 end_token = self._tokens[start_index + dcolon_offset - 1] 5312 else: 5313 end_token = self._prev 5314 5315 if path: 5316 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5317 # it'll roundtrip to a string literal in GET_PATH 5318 if isinstance(path, exp.Identifier) and path.quoted: 5319 escape = True 5320 5321 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5322 5323 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5324 # Databricks transforms it back to the colon/dot notation 5325 if json_path: 5326 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5327 5328 if json_path_expr: 5329 json_path_expr.set("escape", escape) 5330 5331 this = self.expression( 5332 exp.JSONExtract, 5333 this=this, 5334 expression=json_path_expr, 5335 variant_extract=True, 5336 ) 5337 5338 while casts: 5339 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5340 5341 return this 5342 5343 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5344 return self._parse_types() 5345 5346 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5347 this = self._parse_bracket(this) 5348 5349 while self._match_set(self.COLUMN_OPERATORS): 5350 op_token = self._prev.token_type 5351 op = self.COLUMN_OPERATORS.get(op_token) 5352 5353 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5354 field = self._parse_dcolon() 5355 if not field: 5356 self.raise_error("Expected type") 5357 elif op and self._curr: 5358 field = self._parse_column_reference() or self._parse_bracket() 5359 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5360 field = self._parse_column_ops(field) 5361 else: 5362 field = self._parse_field(any_token=True, anonymous_func=True) 5363 5364 if isinstance(field, (exp.Func, exp.Window)) and this: 5365 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5367 this = exp.replace_tree( 5368 this, 5369 lambda n: ( 5370 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5371 if n.table 5372 else n.this 5373 ) 5374 if isinstance(n, exp.Column) 5375 else n, 5376 ) 5377 5378 if op: 5379 this = op(self, this, field) 5380 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5381 this = self.expression( 5382 exp.Column, 5383 comments=this.comments, 5384 this=field, 5385 table=this.this, 5386 db=this.args.get("table"), 5387 catalog=this.args.get("db"), 5388 ) 5389 elif isinstance(field, exp.Window): 5390 # Move the exp.Dot's to the window's function 5391 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5392 field.set("this", window_func) 5393 this = field 5394 else: 5395 this = self.expression(exp.Dot, this=this, expression=field) 5396 5397 if field and field.comments: 5398 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5399 5400 this = self._parse_bracket(this) 5401 5402 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5403 5404 def _parse_primary(self) -> t.Optional[exp.Expression]: 5405 if self._match_set(self.PRIMARY_PARSERS): 5406 token_type = self._prev.token_type 5407 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5408 5409 if token_type == TokenType.STRING: 5410 expressions = [primary] 5411 while self._match(TokenType.STRING): 5412 expressions.append(exp.Literal.string(self._prev.text)) 5413 5414 if len(expressions) > 1: 5415 return self.expression(exp.Concat, expressions=expressions) 5416 5417 return primary 5418 5419 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5420 return exp.Literal.number(f"0.{self._prev.text}") 5421 5422 if self._match(TokenType.L_PAREN): 5423 comments = self._prev_comments 5424 query = self._parse_select() 5425 5426 if query: 5427 expressions = [query] 5428 else: 5429 expressions = self._parse_expressions() 5430 5431 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5432 5433 if not this and self._match(TokenType.R_PAREN, advance=False): 5434 this = self.expression(exp.Tuple) 5435 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5436 this = self._parse_subquery(this=this, parse_alias=False) 5437 elif isinstance(this, exp.Subquery): 5438 this = self._parse_subquery( 5439 this=self._parse_set_operations(this), parse_alias=False 5440 ) 5441 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5442 this = self.expression(exp.Tuple, expressions=expressions) 5443 else: 5444 this = self.expression(exp.Paren, this=this) 5445 5446 if this: 5447 this.add_comments(comments) 5448 5449 self._match_r_paren(expression=this) 5450 return this 5451 5452 return None 5453 5454 def _parse_field( 5455 self, 5456 any_token: bool = False, 5457 tokens: t.Optional[t.Collection[TokenType]] = None, 5458 anonymous_func: bool = False, 5459 ) -> t.Optional[exp.Expression]: 5460 if anonymous_func: 5461 field = ( 5462 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5463 or self._parse_primary() 5464 ) 5465 else: 5466 field = self._parse_primary() or self._parse_function( 5467 anonymous=anonymous_func, any_token=any_token 5468 ) 5469 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5470 5471 def _parse_function( 5472 self, 5473 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5474 anonymous: bool = False, 5475 optional_parens: bool = True, 5476 any_token: bool = False, 5477 ) -> t.Optional[exp.Expression]: 5478 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5479 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5480 fn_syntax = False 5481 if ( 5482 self._match(TokenType.L_BRACE, advance=False) 5483 and self._next 5484 and self._next.text.upper() == "FN" 5485 ): 5486 self._advance(2) 5487 fn_syntax = True 5488 5489 func = self._parse_function_call( 5490 functions=functions, 5491 anonymous=anonymous, 5492 optional_parens=optional_parens, 5493 any_token=any_token, 5494 ) 5495 5496 if fn_syntax: 5497 self._match(TokenType.R_BRACE) 5498 5499 return func 5500 5501 def _parse_function_call( 5502 self, 5503 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5504 anonymous: bool = False, 5505 optional_parens: bool = True, 5506 any_token: bool = False, 5507 ) -> t.Optional[exp.Expression]: 5508 if not self._curr: 5509 return None 5510 5511 comments = self._curr.comments 5512 token_type = self._curr.token_type 5513 this = self._curr.text 5514 upper = this.upper() 5515 5516 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5517 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5518 self._advance() 5519 return self._parse_window(parser(self)) 5520 5521 if not self._next or self._next.token_type != TokenType.L_PAREN: 5522 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5523 self._advance() 5524 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5525 5526 return None 5527 5528 if any_token: 5529 if token_type in self.RESERVED_TOKENS: 5530 return None 5531 elif token_type not in self.FUNC_TOKENS: 5532 return None 5533 5534 self._advance(2) 5535 5536 parser = self.FUNCTION_PARSERS.get(upper) 5537 if parser and not anonymous: 5538 this = parser(self) 5539 else: 5540 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5541 5542 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5543 this = self.expression( 5544 subquery_predicate, comments=comments, this=self._parse_select() 5545 ) 5546 self._match_r_paren() 5547 return this 5548 5549 if functions is None: 5550 functions = self.FUNCTIONS 5551 5552 function = functions.get(upper) 5553 known_function = function and not anonymous 5554 5555 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5556 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5557 5558 post_func_comments = self._curr and self._curr.comments 5559 if known_function and post_func_comments: 5560 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5561 # call we'll construct it as exp.Anonymous, even if it's "known" 5562 if any( 5563 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5564 for comment in post_func_comments 5565 ): 5566 known_function = False 5567 5568 if alias and known_function: 5569 args = self._kv_to_prop_eq(args) 5570 5571 if known_function: 5572 func_builder = t.cast(t.Callable, function) 5573 5574 if "dialect" in func_builder.__code__.co_varnames: 5575 func = func_builder(args, dialect=self.dialect) 5576 else: 5577 func = func_builder(args) 5578 5579 func = self.validate_expression(func, args) 5580 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5581 func.meta["name"] = this 5582 5583 this = func 5584 else: 5585 if token_type == TokenType.IDENTIFIER: 5586 this = exp.Identifier(this=this, quoted=True) 5587 this = self.expression(exp.Anonymous, this=this, expressions=args) 5588 5589 if isinstance(this, exp.Expression): 5590 this.add_comments(comments) 5591 5592 self._match_r_paren(this) 5593 return self._parse_window(this) 5594 5595 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5596 return expression 5597 5598 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5599 transformed = [] 5600 5601 for index, e in enumerate(expressions): 5602 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5603 if isinstance(e, exp.Alias): 5604 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5605 5606 if not isinstance(e, exp.PropertyEQ): 5607 e = self.expression( 5608 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5609 ) 5610 5611 if isinstance(e.this, exp.Column): 5612 e.this.replace(e.this.this) 5613 else: 5614 e = self._to_prop_eq(e, index) 5615 5616 transformed.append(e) 5617 5618 return transformed 5619 5620 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5621 return self._parse_statement() 5622 5623 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5624 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5625 5626 def _parse_user_defined_function( 5627 self, kind: t.Optional[TokenType] = None 5628 ) -> t.Optional[exp.Expression]: 5629 this = self._parse_table_parts(schema=True) 5630 5631 if not self._match(TokenType.L_PAREN): 5632 return this 5633 5634 expressions = self._parse_csv(self._parse_function_parameter) 5635 self._match_r_paren() 5636 return self.expression( 5637 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5638 ) 5639 5640 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5641 literal = self._parse_primary() 5642 if literal: 5643 return self.expression(exp.Introducer, this=token.text, expression=literal) 5644 5645 return self.expression(exp.Identifier, this=token.text) 5646 5647 def _parse_session_parameter(self) -> exp.SessionParameter: 5648 kind = None 5649 this = self._parse_id_var() or self._parse_primary() 5650 5651 if this and self._match(TokenType.DOT): 5652 kind = this.name 5653 this = self._parse_var() or self._parse_primary() 5654 5655 return self.expression(exp.SessionParameter, this=this, kind=kind) 5656 5657 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5658 return self._parse_id_var() 5659 5660 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5661 index = self._index 5662 5663 if self._match(TokenType.L_PAREN): 5664 expressions = t.cast( 5665 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5666 ) 5667 5668 if not self._match(TokenType.R_PAREN): 5669 self._retreat(index) 5670 else: 5671 expressions = [self._parse_lambda_arg()] 5672 5673 if self._match_set(self.LAMBDAS): 5674 return self.LAMBDAS[self._prev.token_type](self, expressions) 5675 5676 self._retreat(index) 5677 5678 this: t.Optional[exp.Expression] 5679 5680 if self._match(TokenType.DISTINCT): 5681 this = self.expression( 5682 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5683 ) 5684 else: 5685 this = self._parse_select_or_expression(alias=alias) 5686 5687 return self._parse_limit( 5688 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5689 ) 5690 5691 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5692 index = self._index 5693 if not self._match(TokenType.L_PAREN): 5694 return this 5695 5696 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5697 # expr can be of both types 5698 if self._match_set(self.SELECT_START_TOKENS): 5699 self._retreat(index) 5700 return this 5701 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5702 self._match_r_paren() 5703 return self.expression(exp.Schema, this=this, expressions=args) 5704 5705 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5706 return self._parse_column_def(self._parse_field(any_token=True)) 5707 5708 def _parse_column_def( 5709 self, this: t.Optional[exp.Expression], computed_column: bool = True 5710 ) -> t.Optional[exp.Expression]: 5711 # column defs are not really columns, they're identifiers 5712 if isinstance(this, exp.Column): 5713 this = this.this 5714 5715 if not computed_column: 5716 self._match(TokenType.ALIAS) 5717 5718 kind = self._parse_types(schema=True) 5719 5720 if self._match_text_seq("FOR", "ORDINALITY"): 5721 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5722 5723 constraints: t.List[exp.Expression] = [] 5724 5725 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5726 ("ALIAS", "MATERIALIZED") 5727 ): 5728 persisted = self._prev.text.upper() == "MATERIALIZED" 5729 constraint_kind = exp.ComputedColumnConstraint( 5730 this=self._parse_assignment(), 5731 persisted=persisted or self._match_text_seq("PERSISTED"), 5732 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5733 ) 5734 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5735 elif ( 5736 kind 5737 and self._match(TokenType.ALIAS, advance=False) 5738 and ( 5739 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5740 or (self._next and self._next.token_type == TokenType.L_PAREN) 5741 ) 5742 ): 5743 self._advance() 5744 constraints.append( 5745 self.expression( 5746 exp.ColumnConstraint, 5747 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5748 ) 5749 ) 5750 5751 while True: 5752 constraint = self._parse_column_constraint() 5753 if not constraint: 5754 break 5755 constraints.append(constraint) 5756 5757 if not kind and not constraints: 5758 return this 5759 5760 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5761 5762 def _parse_auto_increment( 5763 self, 5764 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5765 start = None 5766 increment = None 5767 5768 if self._match(TokenType.L_PAREN, advance=False): 5769 args = self._parse_wrapped_csv(self._parse_bitwise) 5770 start = seq_get(args, 0) 5771 increment = seq_get(args, 1) 5772 elif self._match_text_seq("START"): 5773 start = self._parse_bitwise() 5774 self._match_text_seq("INCREMENT") 5775 increment = self._parse_bitwise() 5776 5777 if start and increment: 5778 return exp.GeneratedAsIdentityColumnConstraint( 5779 start=start, increment=increment, this=False 5780 ) 5781 5782 return exp.AutoIncrementColumnConstraint() 5783 5784 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5785 if not self._match_text_seq("REFRESH"): 5786 self._retreat(self._index - 1) 5787 return None 5788 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5789 5790 def _parse_compress(self) -> exp.CompressColumnConstraint: 5791 if self._match(TokenType.L_PAREN, advance=False): 5792 return self.expression( 5793 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5794 ) 5795 5796 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5797 5798 def _parse_generated_as_identity( 5799 self, 5800 ) -> ( 5801 exp.GeneratedAsIdentityColumnConstraint 5802 | exp.ComputedColumnConstraint 5803 | exp.GeneratedAsRowColumnConstraint 5804 ): 5805 if self._match_text_seq("BY", "DEFAULT"): 5806 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5807 this = self.expression( 5808 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5809 ) 5810 else: 5811 self._match_text_seq("ALWAYS") 5812 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5813 5814 self._match(TokenType.ALIAS) 5815 5816 if self._match_text_seq("ROW"): 5817 start = self._match_text_seq("START") 5818 if not start: 5819 self._match(TokenType.END) 5820 hidden = self._match_text_seq("HIDDEN") 5821 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5822 5823 identity = self._match_text_seq("IDENTITY") 5824 5825 if self._match(TokenType.L_PAREN): 5826 if self._match(TokenType.START_WITH): 5827 this.set("start", self._parse_bitwise()) 5828 if self._match_text_seq("INCREMENT", "BY"): 5829 this.set("increment", self._parse_bitwise()) 5830 if self._match_text_seq("MINVALUE"): 5831 this.set("minvalue", self._parse_bitwise()) 5832 if self._match_text_seq("MAXVALUE"): 5833 this.set("maxvalue", self._parse_bitwise()) 5834 5835 if self._match_text_seq("CYCLE"): 5836 this.set("cycle", True) 5837 elif self._match_text_seq("NO", "CYCLE"): 5838 this.set("cycle", False) 5839 5840 if not identity: 5841 this.set("expression", self._parse_range()) 5842 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5843 args = self._parse_csv(self._parse_bitwise) 5844 this.set("start", seq_get(args, 0)) 5845 this.set("increment", seq_get(args, 1)) 5846 5847 self._match_r_paren() 5848 5849 return this 5850 5851 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5852 self._match_text_seq("LENGTH") 5853 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5854 5855 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5856 if self._match_text_seq("NULL"): 5857 return self.expression(exp.NotNullColumnConstraint) 5858 if self._match_text_seq("CASESPECIFIC"): 5859 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5860 if self._match_text_seq("FOR", "REPLICATION"): 5861 return self.expression(exp.NotForReplicationColumnConstraint) 5862 5863 # Unconsume the `NOT` token 5864 self._retreat(self._index - 1) 5865 return None 5866 5867 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5868 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5869 5870 procedure_option_follows = ( 5871 self._match(TokenType.WITH, advance=False) 5872 and self._next 5873 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5874 ) 5875 5876 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5877 return self.expression( 5878 exp.ColumnConstraint, 5879 this=this, 5880 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5881 ) 5882 5883 return this 5884 5885 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5886 if not self._match(TokenType.CONSTRAINT): 5887 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5888 5889 return self.expression( 5890 exp.Constraint, 5891 this=self._parse_id_var(), 5892 expressions=self._parse_unnamed_constraints(), 5893 ) 5894 5895 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5896 constraints = [] 5897 while True: 5898 constraint = self._parse_unnamed_constraint() or self._parse_function() 5899 if not constraint: 5900 break 5901 constraints.append(constraint) 5902 5903 return constraints 5904 5905 def _parse_unnamed_constraint( 5906 self, constraints: t.Optional[t.Collection[str]] = None 5907 ) -> t.Optional[exp.Expression]: 5908 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5909 constraints or self.CONSTRAINT_PARSERS 5910 ): 5911 return None 5912 5913 constraint = self._prev.text.upper() 5914 if constraint not in self.CONSTRAINT_PARSERS: 5915 self.raise_error(f"No parser found for schema constraint {constraint}.") 5916 5917 return self.CONSTRAINT_PARSERS[constraint](self) 5918 5919 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5920 return self._parse_id_var(any_token=False) 5921 5922 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5923 self._match_text_seq("KEY") 5924 return self.expression( 5925 exp.UniqueColumnConstraint, 5926 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5927 this=self._parse_schema(self._parse_unique_key()), 5928 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5929 on_conflict=self._parse_on_conflict(), 5930 ) 5931 5932 def _parse_key_constraint_options(self) -> t.List[str]: 5933 options = [] 5934 while True: 5935 if not self._curr: 5936 break 5937 5938 if self._match(TokenType.ON): 5939 action = None 5940 on = self._advance_any() and self._prev.text 5941 5942 if self._match_text_seq("NO", "ACTION"): 5943 action = "NO ACTION" 5944 elif self._match_text_seq("CASCADE"): 5945 action = "CASCADE" 5946 elif self._match_text_seq("RESTRICT"): 5947 action = "RESTRICT" 5948 elif self._match_pair(TokenType.SET, TokenType.NULL): 5949 action = "SET NULL" 5950 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5951 action = "SET DEFAULT" 5952 else: 5953 self.raise_error("Invalid key constraint") 5954 5955 options.append(f"ON {on} {action}") 5956 else: 5957 var = self._parse_var_from_options( 5958 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5959 ) 5960 if not var: 5961 break 5962 options.append(var.name) 5963 5964 return options 5965 5966 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5967 if match and not self._match(TokenType.REFERENCES): 5968 return None 5969 5970 expressions = None 5971 this = self._parse_table(schema=True) 5972 options = self._parse_key_constraint_options() 5973 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5974 5975 def _parse_foreign_key(self) -> exp.ForeignKey: 5976 expressions = self._parse_wrapped_id_vars() 5977 reference = self._parse_references() 5978 options = {} 5979 5980 while self._match(TokenType.ON): 5981 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5982 self.raise_error("Expected DELETE or UPDATE") 5983 5984 kind = self._prev.text.lower() 5985 5986 if self._match_text_seq("NO", "ACTION"): 5987 action = "NO ACTION" 5988 elif self._match(TokenType.SET): 5989 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5990 action = "SET " + self._prev.text.upper() 5991 else: 5992 self._advance() 5993 action = self._prev.text.upper() 5994 5995 options[kind] = action 5996 5997 return self.expression( 5998 exp.ForeignKey, 5999 expressions=expressions, 6000 reference=reference, 6001 **options, # type: ignore 6002 ) 6003 6004 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6005 return self._parse_ordered() or self._parse_field() 6006 6007 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6008 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6009 self._retreat(self._index - 1) 6010 return None 6011 6012 id_vars = self._parse_wrapped_id_vars() 6013 return self.expression( 6014 exp.PeriodForSystemTimeConstraint, 6015 this=seq_get(id_vars, 0), 6016 expression=seq_get(id_vars, 1), 6017 ) 6018 6019 def _parse_primary_key( 6020 self, wrapped_optional: bool = False, in_props: bool = False 6021 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6022 desc = ( 6023 self._match_set((TokenType.ASC, TokenType.DESC)) 6024 and self._prev.token_type == TokenType.DESC 6025 ) 6026 6027 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6028 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6029 6030 expressions = self._parse_wrapped_csv( 6031 self._parse_primary_key_part, optional=wrapped_optional 6032 ) 6033 options = self._parse_key_constraint_options() 6034 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6035 6036 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6037 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6038 6039 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6040 """ 6041 Parses a datetime column in ODBC format. We parse the column into the corresponding 6042 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6043 same as we did for `DATE('yyyy-mm-dd')`. 6044 6045 Reference: 6046 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6047 """ 6048 self._match(TokenType.VAR) 6049 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6050 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6051 if not self._match(TokenType.R_BRACE): 6052 self.raise_error("Expected }") 6053 return expression 6054 6055 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6056 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6057 return this 6058 6059 bracket_kind = self._prev.token_type 6060 if ( 6061 bracket_kind == TokenType.L_BRACE 6062 and self._curr 6063 and self._curr.token_type == TokenType.VAR 6064 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6065 ): 6066 return self._parse_odbc_datetime_literal() 6067 6068 expressions = self._parse_csv( 6069 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6070 ) 6071 6072 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6073 self.raise_error("Expected ]") 6074 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6075 self.raise_error("Expected }") 6076 6077 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6078 if bracket_kind == TokenType.L_BRACE: 6079 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6080 elif not this: 6081 this = build_array_constructor( 6082 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6083 ) 6084 else: 6085 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6086 if constructor_type: 6087 return build_array_constructor( 6088 constructor_type, 6089 args=expressions, 6090 bracket_kind=bracket_kind, 6091 dialect=self.dialect, 6092 ) 6093 6094 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6095 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6096 6097 self._add_comments(this) 6098 return self._parse_bracket(this) 6099 6100 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6101 if self._match(TokenType.COLON): 6102 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6103 return this 6104 6105 def _parse_case(self) -> t.Optional[exp.Expression]: 6106 ifs = [] 6107 default = None 6108 6109 comments = self._prev_comments 6110 expression = self._parse_assignment() 6111 6112 while self._match(TokenType.WHEN): 6113 this = self._parse_assignment() 6114 self._match(TokenType.THEN) 6115 then = self._parse_assignment() 6116 ifs.append(self.expression(exp.If, this=this, true=then)) 6117 6118 if self._match(TokenType.ELSE): 6119 default = self._parse_assignment() 6120 6121 if not self._match(TokenType.END): 6122 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6123 default = exp.column("interval") 6124 else: 6125 self.raise_error("Expected END after CASE", self._prev) 6126 6127 return self.expression( 6128 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6129 ) 6130 6131 def _parse_if(self) -> t.Optional[exp.Expression]: 6132 if self._match(TokenType.L_PAREN): 6133 args = self._parse_csv(self._parse_assignment) 6134 this = self.validate_expression(exp.If.from_arg_list(args), args) 6135 self._match_r_paren() 6136 else: 6137 index = self._index - 1 6138 6139 if self.NO_PAREN_IF_COMMANDS and index == 0: 6140 return self._parse_as_command(self._prev) 6141 6142 condition = self._parse_assignment() 6143 6144 if not condition: 6145 self._retreat(index) 6146 return None 6147 6148 self._match(TokenType.THEN) 6149 true = self._parse_assignment() 6150 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6151 self._match(TokenType.END) 6152 this = self.expression(exp.If, this=condition, true=true, false=false) 6153 6154 return this 6155 6156 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6157 if not self._match_text_seq("VALUE", "FOR"): 6158 self._retreat(self._index - 1) 6159 return None 6160 6161 return self.expression( 6162 exp.NextValueFor, 6163 this=self._parse_column(), 6164 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6165 ) 6166 6167 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6168 this = self._parse_function() or self._parse_var_or_string(upper=True) 6169 6170 if self._match(TokenType.FROM): 6171 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6172 6173 if not self._match(TokenType.COMMA): 6174 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6175 6176 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6177 6178 def _parse_gap_fill(self) -> exp.GapFill: 6179 self._match(TokenType.TABLE) 6180 this = self._parse_table() 6181 6182 self._match(TokenType.COMMA) 6183 args = [this, *self._parse_csv(self._parse_lambda)] 6184 6185 gap_fill = exp.GapFill.from_arg_list(args) 6186 return self.validate_expression(gap_fill, args) 6187 6188 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6189 this = self._parse_assignment() 6190 6191 if not self._match(TokenType.ALIAS): 6192 if self._match(TokenType.COMMA): 6193 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6194 6195 self.raise_error("Expected AS after CAST") 6196 6197 fmt = None 6198 to = self._parse_types() 6199 6200 default = self._match(TokenType.DEFAULT) 6201 if default: 6202 default = self._parse_bitwise() 6203 self._match_text_seq("ON", "CONVERSION", "ERROR") 6204 6205 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6206 fmt_string = self._parse_string() 6207 fmt = self._parse_at_time_zone(fmt_string) 6208 6209 if not to: 6210 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6211 if to.this in exp.DataType.TEMPORAL_TYPES: 6212 this = self.expression( 6213 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6214 this=this, 6215 format=exp.Literal.string( 6216 format_time( 6217 fmt_string.this if fmt_string else "", 6218 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6219 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6220 ) 6221 ), 6222 safe=safe, 6223 ) 6224 6225 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6226 this.set("zone", fmt.args["zone"]) 6227 return this 6228 elif not to: 6229 self.raise_error("Expected TYPE after CAST") 6230 elif isinstance(to, exp.Identifier): 6231 to = exp.DataType.build(to.name, udt=True) 6232 elif to.this == exp.DataType.Type.CHAR: 6233 if self._match(TokenType.CHARACTER_SET): 6234 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6235 6236 return self.expression( 6237 exp.Cast if strict else exp.TryCast, 6238 this=this, 6239 to=to, 6240 format=fmt, 6241 safe=safe, 6242 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6243 default=default, 6244 ) 6245 6246 def _parse_string_agg(self) -> exp.GroupConcat: 6247 if self._match(TokenType.DISTINCT): 6248 args: t.List[t.Optional[exp.Expression]] = [ 6249 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6250 ] 6251 if self._match(TokenType.COMMA): 6252 args.extend(self._parse_csv(self._parse_assignment)) 6253 else: 6254 args = self._parse_csv(self._parse_assignment) # type: ignore 6255 6256 if self._match_text_seq("ON", "OVERFLOW"): 6257 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6258 if self._match_text_seq("ERROR"): 6259 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6260 else: 6261 self._match_text_seq("TRUNCATE") 6262 on_overflow = self.expression( 6263 exp.OverflowTruncateBehavior, 6264 this=self._parse_string(), 6265 with_count=( 6266 self._match_text_seq("WITH", "COUNT") 6267 or not self._match_text_seq("WITHOUT", "COUNT") 6268 ), 6269 ) 6270 else: 6271 on_overflow = None 6272 6273 index = self._index 6274 if not self._match(TokenType.R_PAREN) and args: 6275 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6276 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6277 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6278 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6279 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6280 6281 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6282 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6283 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6284 if not self._match_text_seq("WITHIN", "GROUP"): 6285 self._retreat(index) 6286 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6287 6288 # The corresponding match_r_paren will be called in parse_function (caller) 6289 self._match_l_paren() 6290 6291 return self.expression( 6292 exp.GroupConcat, 6293 this=self._parse_order(this=seq_get(args, 0)), 6294 separator=seq_get(args, 1), 6295 on_overflow=on_overflow, 6296 ) 6297 6298 def _parse_convert( 6299 self, strict: bool, safe: t.Optional[bool] = None 6300 ) -> t.Optional[exp.Expression]: 6301 this = self._parse_bitwise() 6302 6303 if self._match(TokenType.USING): 6304 to: t.Optional[exp.Expression] = self.expression( 6305 exp.CharacterSet, this=self._parse_var() 6306 ) 6307 elif self._match(TokenType.COMMA): 6308 to = self._parse_types() 6309 else: 6310 to = None 6311 6312 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6313 6314 def _parse_xml_table(self) -> exp.XMLTable: 6315 namespaces = None 6316 passing = None 6317 columns = None 6318 6319 if self._match_text_seq("XMLNAMESPACES", "("): 6320 namespaces = self._parse_xml_namespace() 6321 self._match_text_seq(")", ",") 6322 6323 this = self._parse_string() 6324 6325 if self._match_text_seq("PASSING"): 6326 # The BY VALUE keywords are optional and are provided for semantic clarity 6327 self._match_text_seq("BY", "VALUE") 6328 passing = self._parse_csv(self._parse_column) 6329 6330 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6331 6332 if self._match_text_seq("COLUMNS"): 6333 columns = self._parse_csv(self._parse_field_def) 6334 6335 return self.expression( 6336 exp.XMLTable, 6337 this=this, 6338 namespaces=namespaces, 6339 passing=passing, 6340 columns=columns, 6341 by_ref=by_ref, 6342 ) 6343 6344 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6345 namespaces = [] 6346 6347 while True: 6348 if self._match(TokenType.DEFAULT): 6349 uri = self._parse_string() 6350 else: 6351 uri = self._parse_alias(self._parse_string()) 6352 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6353 if not self._match(TokenType.COMMA): 6354 break 6355 6356 return namespaces 6357 6358 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6359 """ 6360 There are generally two variants of the DECODE function: 6361 6362 - DECODE(bin, charset) 6363 - DECODE(expression, search, result [, search, result] ... [, default]) 6364 6365 The second variant will always be parsed into a CASE expression. Note that NULL 6366 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6367 instead of relying on pattern matching. 6368 """ 6369 args = self._parse_csv(self._parse_assignment) 6370 6371 if len(args) < 3: 6372 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6373 6374 expression, *expressions = args 6375 if not expression: 6376 return None 6377 6378 ifs = [] 6379 for search, result in zip(expressions[::2], expressions[1::2]): 6380 if not search or not result: 6381 return None 6382 6383 if isinstance(search, exp.Literal): 6384 ifs.append( 6385 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6386 ) 6387 elif isinstance(search, exp.Null): 6388 ifs.append( 6389 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6390 ) 6391 else: 6392 cond = exp.or_( 6393 exp.EQ(this=expression.copy(), expression=search), 6394 exp.and_( 6395 exp.Is(this=expression.copy(), expression=exp.Null()), 6396 exp.Is(this=search.copy(), expression=exp.Null()), 6397 copy=False, 6398 ), 6399 copy=False, 6400 ) 6401 ifs.append(exp.If(this=cond, true=result)) 6402 6403 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6404 6405 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6406 self._match_text_seq("KEY") 6407 key = self._parse_column() 6408 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6409 self._match_text_seq("VALUE") 6410 value = self._parse_bitwise() 6411 6412 if not key and not value: 6413 return None 6414 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6415 6416 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6417 if not this or not self._match_text_seq("FORMAT", "JSON"): 6418 return this 6419 6420 return self.expression(exp.FormatJson, this=this) 6421 6422 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6423 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6424 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6425 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6426 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6427 else: 6428 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6429 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6430 6431 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6432 6433 if not empty and not error and not null: 6434 return None 6435 6436 return self.expression( 6437 exp.OnCondition, 6438 empty=empty, 6439 error=error, 6440 null=null, 6441 ) 6442 6443 def _parse_on_handling( 6444 self, on: str, *values: str 6445 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6446 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6447 for value in values: 6448 if self._match_text_seq(value, "ON", on): 6449 return f"{value} ON {on}" 6450 6451 index = self._index 6452 if self._match(TokenType.DEFAULT): 6453 default_value = self._parse_bitwise() 6454 if self._match_text_seq("ON", on): 6455 return default_value 6456 6457 self._retreat(index) 6458 6459 return None 6460 6461 @t.overload 6462 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6463 6464 @t.overload 6465 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6466 6467 def _parse_json_object(self, agg=False): 6468 star = self._parse_star() 6469 expressions = ( 6470 [star] 6471 if star 6472 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6473 ) 6474 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6475 6476 unique_keys = None 6477 if self._match_text_seq("WITH", "UNIQUE"): 6478 unique_keys = True 6479 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6480 unique_keys = False 6481 6482 self._match_text_seq("KEYS") 6483 6484 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6485 self._parse_type() 6486 ) 6487 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6488 6489 return self.expression( 6490 exp.JSONObjectAgg if agg else exp.JSONObject, 6491 expressions=expressions, 6492 null_handling=null_handling, 6493 unique_keys=unique_keys, 6494 return_type=return_type, 6495 encoding=encoding, 6496 ) 6497 6498 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6499 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6500 if not self._match_text_seq("NESTED"): 6501 this = self._parse_id_var() 6502 kind = self._parse_types(allow_identifiers=False) 6503 nested = None 6504 else: 6505 this = None 6506 kind = None 6507 nested = True 6508 6509 path = self._match_text_seq("PATH") and self._parse_string() 6510 nested_schema = nested and self._parse_json_schema() 6511 6512 return self.expression( 6513 exp.JSONColumnDef, 6514 this=this, 6515 kind=kind, 6516 path=path, 6517 nested_schema=nested_schema, 6518 ) 6519 6520 def _parse_json_schema(self) -> exp.JSONSchema: 6521 self._match_text_seq("COLUMNS") 6522 return self.expression( 6523 exp.JSONSchema, 6524 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6525 ) 6526 6527 def _parse_json_table(self) -> exp.JSONTable: 6528 this = self._parse_format_json(self._parse_bitwise()) 6529 path = self._match(TokenType.COMMA) and self._parse_string() 6530 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6531 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6532 schema = self._parse_json_schema() 6533 6534 return exp.JSONTable( 6535 this=this, 6536 schema=schema, 6537 path=path, 6538 error_handling=error_handling, 6539 empty_handling=empty_handling, 6540 ) 6541 6542 def _parse_match_against(self) -> exp.MatchAgainst: 6543 expressions = self._parse_csv(self._parse_column) 6544 6545 self._match_text_seq(")", "AGAINST", "(") 6546 6547 this = self._parse_string() 6548 6549 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6550 modifier = "IN NATURAL LANGUAGE MODE" 6551 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6552 modifier = f"{modifier} WITH QUERY EXPANSION" 6553 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6554 modifier = "IN BOOLEAN MODE" 6555 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6556 modifier = "WITH QUERY EXPANSION" 6557 else: 6558 modifier = None 6559 6560 return self.expression( 6561 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6562 ) 6563 6564 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6565 def _parse_open_json(self) -> exp.OpenJSON: 6566 this = self._parse_bitwise() 6567 path = self._match(TokenType.COMMA) and self._parse_string() 6568 6569 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6570 this = self._parse_field(any_token=True) 6571 kind = self._parse_types() 6572 path = self._parse_string() 6573 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6574 6575 return self.expression( 6576 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6577 ) 6578 6579 expressions = None 6580 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6581 self._match_l_paren() 6582 expressions = self._parse_csv(_parse_open_json_column_def) 6583 6584 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6585 6586 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6587 args = self._parse_csv(self._parse_bitwise) 6588 6589 if self._match(TokenType.IN): 6590 return self.expression( 6591 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6592 ) 6593 6594 if haystack_first: 6595 haystack = seq_get(args, 0) 6596 needle = seq_get(args, 1) 6597 else: 6598 haystack = seq_get(args, 1) 6599 needle = seq_get(args, 0) 6600 6601 return self.expression( 6602 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6603 ) 6604 6605 def _parse_predict(self) -> exp.Predict: 6606 self._match_text_seq("MODEL") 6607 this = self._parse_table() 6608 6609 self._match(TokenType.COMMA) 6610 self._match_text_seq("TABLE") 6611 6612 return self.expression( 6613 exp.Predict, 6614 this=this, 6615 expression=self._parse_table(), 6616 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6617 ) 6618 6619 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6620 args = self._parse_csv(self._parse_table) 6621 return exp.JoinHint(this=func_name.upper(), expressions=args) 6622 6623 def _parse_substring(self) -> exp.Substring: 6624 # Postgres supports the form: substring(string [from int] [for int]) 6625 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6626 6627 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6628 6629 if self._match(TokenType.FROM): 6630 args.append(self._parse_bitwise()) 6631 if self._match(TokenType.FOR): 6632 if len(args) == 1: 6633 args.append(exp.Literal.number(1)) 6634 args.append(self._parse_bitwise()) 6635 6636 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6637 6638 def _parse_trim(self) -> exp.Trim: 6639 # https://www.w3resource.com/sql/character-functions/trim.php 6640 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6641 6642 position = None 6643 collation = None 6644 expression = None 6645 6646 if self._match_texts(self.TRIM_TYPES): 6647 position = self._prev.text.upper() 6648 6649 this = self._parse_bitwise() 6650 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6651 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6652 expression = self._parse_bitwise() 6653 6654 if invert_order: 6655 this, expression = expression, this 6656 6657 if self._match(TokenType.COLLATE): 6658 collation = self._parse_bitwise() 6659 6660 return self.expression( 6661 exp.Trim, this=this, position=position, expression=expression, collation=collation 6662 ) 6663 6664 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6665 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6666 6667 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6668 return self._parse_window(self._parse_id_var(), alias=True) 6669 6670 def _parse_respect_or_ignore_nulls( 6671 self, this: t.Optional[exp.Expression] 6672 ) -> t.Optional[exp.Expression]: 6673 if self._match_text_seq("IGNORE", "NULLS"): 6674 return self.expression(exp.IgnoreNulls, this=this) 6675 if self._match_text_seq("RESPECT", "NULLS"): 6676 return self.expression(exp.RespectNulls, this=this) 6677 return this 6678 6679 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6680 if self._match(TokenType.HAVING): 6681 self._match_texts(("MAX", "MIN")) 6682 max = self._prev.text.upper() != "MIN" 6683 return self.expression( 6684 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6685 ) 6686 6687 return this 6688 6689 def _parse_window( 6690 self, this: t.Optional[exp.Expression], alias: bool = False 6691 ) -> t.Optional[exp.Expression]: 6692 func = this 6693 comments = func.comments if isinstance(func, exp.Expression) else None 6694 6695 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6696 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6697 if self._match_text_seq("WITHIN", "GROUP"): 6698 order = self._parse_wrapped(self._parse_order) 6699 this = self.expression(exp.WithinGroup, this=this, expression=order) 6700 6701 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6702 self._match(TokenType.WHERE) 6703 this = self.expression( 6704 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6705 ) 6706 self._match_r_paren() 6707 6708 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6709 # Some dialects choose to implement and some do not. 6710 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6711 6712 # There is some code above in _parse_lambda that handles 6713 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6714 6715 # The below changes handle 6716 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6717 6718 # Oracle allows both formats 6719 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6720 # and Snowflake chose to do the same for familiarity 6721 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6722 if isinstance(this, exp.AggFunc): 6723 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6724 6725 if ignore_respect and ignore_respect is not this: 6726 ignore_respect.replace(ignore_respect.this) 6727 this = self.expression(ignore_respect.__class__, this=this) 6728 6729 this = self._parse_respect_or_ignore_nulls(this) 6730 6731 # bigquery select from window x AS (partition by ...) 6732 if alias: 6733 over = None 6734 self._match(TokenType.ALIAS) 6735 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6736 return this 6737 else: 6738 over = self._prev.text.upper() 6739 6740 if comments and isinstance(func, exp.Expression): 6741 func.pop_comments() 6742 6743 if not self._match(TokenType.L_PAREN): 6744 return self.expression( 6745 exp.Window, 6746 comments=comments, 6747 this=this, 6748 alias=self._parse_id_var(False), 6749 over=over, 6750 ) 6751 6752 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6753 6754 first = self._match(TokenType.FIRST) 6755 if self._match_text_seq("LAST"): 6756 first = False 6757 6758 partition, order = self._parse_partition_and_order() 6759 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6760 6761 if kind: 6762 self._match(TokenType.BETWEEN) 6763 start = self._parse_window_spec() 6764 self._match(TokenType.AND) 6765 end = self._parse_window_spec() 6766 6767 spec = self.expression( 6768 exp.WindowSpec, 6769 kind=kind, 6770 start=start["value"], 6771 start_side=start["side"], 6772 end=end["value"], 6773 end_side=end["side"], 6774 ) 6775 else: 6776 spec = None 6777 6778 self._match_r_paren() 6779 6780 window = self.expression( 6781 exp.Window, 6782 comments=comments, 6783 this=this, 6784 partition_by=partition, 6785 order=order, 6786 spec=spec, 6787 alias=window_alias, 6788 over=over, 6789 first=first, 6790 ) 6791 6792 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6793 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6794 return self._parse_window(window, alias=alias) 6795 6796 return window 6797 6798 def _parse_partition_and_order( 6799 self, 6800 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6801 return self._parse_partition_by(), self._parse_order() 6802 6803 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6804 self._match(TokenType.BETWEEN) 6805 6806 return { 6807 "value": ( 6808 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6809 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6810 or self._parse_bitwise() 6811 ), 6812 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6813 } 6814 6815 def _parse_alias( 6816 self, this: t.Optional[exp.Expression], explicit: bool = False 6817 ) -> t.Optional[exp.Expression]: 6818 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6819 # so this section tries to parse the clause version and if it fails, it treats the token 6820 # as an identifier (alias) 6821 if self._can_parse_limit_or_offset(): 6822 return this 6823 6824 any_token = self._match(TokenType.ALIAS) 6825 comments = self._prev_comments or [] 6826 6827 if explicit and not any_token: 6828 return this 6829 6830 if self._match(TokenType.L_PAREN): 6831 aliases = self.expression( 6832 exp.Aliases, 6833 comments=comments, 6834 this=this, 6835 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6836 ) 6837 self._match_r_paren(aliases) 6838 return aliases 6839 6840 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6841 self.STRING_ALIASES and self._parse_string_as_identifier() 6842 ) 6843 6844 if alias: 6845 comments.extend(alias.pop_comments()) 6846 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6847 column = this.this 6848 6849 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6850 if not this.comments and column and column.comments: 6851 this.comments = column.pop_comments() 6852 6853 return this 6854 6855 def _parse_id_var( 6856 self, 6857 any_token: bool = True, 6858 tokens: t.Optional[t.Collection[TokenType]] = None, 6859 ) -> t.Optional[exp.Expression]: 6860 expression = self._parse_identifier() 6861 if not expression and ( 6862 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6863 ): 6864 quoted = self._prev.token_type == TokenType.STRING 6865 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6866 6867 return expression 6868 6869 def _parse_string(self) -> t.Optional[exp.Expression]: 6870 if self._match_set(self.STRING_PARSERS): 6871 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6872 return self._parse_placeholder() 6873 6874 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6875 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6876 6877 def _parse_number(self) -> t.Optional[exp.Expression]: 6878 if self._match_set(self.NUMERIC_PARSERS): 6879 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6880 return self._parse_placeholder() 6881 6882 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6883 if self._match(TokenType.IDENTIFIER): 6884 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6885 return self._parse_placeholder() 6886 6887 def _parse_var( 6888 self, 6889 any_token: bool = False, 6890 tokens: t.Optional[t.Collection[TokenType]] = None, 6891 upper: bool = False, 6892 ) -> t.Optional[exp.Expression]: 6893 if ( 6894 (any_token and self._advance_any()) 6895 or self._match(TokenType.VAR) 6896 or (self._match_set(tokens) if tokens else False) 6897 ): 6898 return self.expression( 6899 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6900 ) 6901 return self._parse_placeholder() 6902 6903 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6904 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6905 self._advance() 6906 return self._prev 6907 return None 6908 6909 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6910 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6911 6912 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6913 return self._parse_primary() or self._parse_var(any_token=True) 6914 6915 def _parse_null(self) -> t.Optional[exp.Expression]: 6916 if self._match_set(self.NULL_TOKENS): 6917 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6918 return self._parse_placeholder() 6919 6920 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6921 if self._match(TokenType.TRUE): 6922 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6923 if self._match(TokenType.FALSE): 6924 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6925 return self._parse_placeholder() 6926 6927 def _parse_star(self) -> t.Optional[exp.Expression]: 6928 if self._match(TokenType.STAR): 6929 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6930 return self._parse_placeholder() 6931 6932 def _parse_parameter(self) -> exp.Parameter: 6933 this = self._parse_identifier() or self._parse_primary_or_var() 6934 return self.expression(exp.Parameter, this=this) 6935 6936 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6937 if self._match_set(self.PLACEHOLDER_PARSERS): 6938 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6939 if placeholder: 6940 return placeholder 6941 self._advance(-1) 6942 return None 6943 6944 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6945 if not self._match_texts(keywords): 6946 return None 6947 if self._match(TokenType.L_PAREN, advance=False): 6948 return self._parse_wrapped_csv(self._parse_expression) 6949 6950 expression = self._parse_expression() 6951 return [expression] if expression else None 6952 6953 def _parse_csv( 6954 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6955 ) -> t.List[exp.Expression]: 6956 parse_result = parse_method() 6957 items = [parse_result] if parse_result is not None else [] 6958 6959 while self._match(sep): 6960 self._add_comments(parse_result) 6961 parse_result = parse_method() 6962 if parse_result is not None: 6963 items.append(parse_result) 6964 6965 return items 6966 6967 def _parse_tokens( 6968 self, parse_method: t.Callable, expressions: t.Dict 6969 ) -> t.Optional[exp.Expression]: 6970 this = parse_method() 6971 6972 while self._match_set(expressions): 6973 this = self.expression( 6974 expressions[self._prev.token_type], 6975 this=this, 6976 comments=self._prev_comments, 6977 expression=parse_method(), 6978 ) 6979 6980 return this 6981 6982 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6983 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6984 6985 def _parse_wrapped_csv( 6986 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6987 ) -> t.List[exp.Expression]: 6988 return self._parse_wrapped( 6989 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6990 ) 6991 6992 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6993 wrapped = self._match(TokenType.L_PAREN) 6994 if not wrapped and not optional: 6995 self.raise_error("Expecting (") 6996 parse_result = parse_method() 6997 if wrapped: 6998 self._match_r_paren() 6999 return parse_result 7000 7001 def _parse_expressions(self) -> t.List[exp.Expression]: 7002 return self._parse_csv(self._parse_expression) 7003 7004 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7005 return self._parse_select() or self._parse_set_operations( 7006 self._parse_alias(self._parse_assignment(), explicit=True) 7007 if alias 7008 else self._parse_assignment() 7009 ) 7010 7011 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7012 return self._parse_query_modifiers( 7013 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7014 ) 7015 7016 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7017 this = None 7018 if self._match_texts(self.TRANSACTION_KIND): 7019 this = self._prev.text 7020 7021 self._match_texts(("TRANSACTION", "WORK")) 7022 7023 modes = [] 7024 while True: 7025 mode = [] 7026 while self._match(TokenType.VAR): 7027 mode.append(self._prev.text) 7028 7029 if mode: 7030 modes.append(" ".join(mode)) 7031 if not self._match(TokenType.COMMA): 7032 break 7033 7034 return self.expression(exp.Transaction, this=this, modes=modes) 7035 7036 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7037 chain = None 7038 savepoint = None 7039 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7040 7041 self._match_texts(("TRANSACTION", "WORK")) 7042 7043 if self._match_text_seq("TO"): 7044 self._match_text_seq("SAVEPOINT") 7045 savepoint = self._parse_id_var() 7046 7047 if self._match(TokenType.AND): 7048 chain = not self._match_text_seq("NO") 7049 self._match_text_seq("CHAIN") 7050 7051 if is_rollback: 7052 return self.expression(exp.Rollback, savepoint=savepoint) 7053 7054 return self.expression(exp.Commit, chain=chain) 7055 7056 def _parse_refresh(self) -> exp.Refresh: 7057 self._match(TokenType.TABLE) 7058 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7059 7060 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7061 if not self._match_text_seq("ADD"): 7062 return None 7063 7064 self._match(TokenType.COLUMN) 7065 exists_column = self._parse_exists(not_=True) 7066 expression = self._parse_field_def() 7067 7068 if expression: 7069 expression.set("exists", exists_column) 7070 7071 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7072 if self._match_texts(("FIRST", "AFTER")): 7073 position = self._prev.text 7074 column_position = self.expression( 7075 exp.ColumnPosition, this=self._parse_column(), position=position 7076 ) 7077 expression.set("position", column_position) 7078 7079 return expression 7080 7081 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7082 drop = self._match(TokenType.DROP) and self._parse_drop() 7083 if drop and not isinstance(drop, exp.Command): 7084 drop.set("kind", drop.args.get("kind", "COLUMN")) 7085 return drop 7086 7087 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7088 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7089 return self.expression( 7090 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7091 ) 7092 7093 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7094 index = self._index - 1 7095 7096 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7097 return self._parse_csv( 7098 lambda: self.expression( 7099 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7100 ) 7101 ) 7102 7103 self._retreat(index) 7104 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7105 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7106 7107 if self._match_text_seq("ADD", "COLUMNS"): 7108 schema = self._parse_schema() 7109 if schema: 7110 return [schema] 7111 return [] 7112 7113 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7114 7115 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7116 if self._match_texts(self.ALTER_ALTER_PARSERS): 7117 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7118 7119 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7120 # keyword after ALTER we default to parsing this statement 7121 self._match(TokenType.COLUMN) 7122 column = self._parse_field(any_token=True) 7123 7124 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7125 return self.expression(exp.AlterColumn, this=column, drop=True) 7126 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7127 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7128 if self._match(TokenType.COMMENT): 7129 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7130 if self._match_text_seq("DROP", "NOT", "NULL"): 7131 return self.expression( 7132 exp.AlterColumn, 7133 this=column, 7134 drop=True, 7135 allow_null=True, 7136 ) 7137 if self._match_text_seq("SET", "NOT", "NULL"): 7138 return self.expression( 7139 exp.AlterColumn, 7140 this=column, 7141 allow_null=False, 7142 ) 7143 7144 if self._match_text_seq("SET", "VISIBLE"): 7145 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7146 if self._match_text_seq("SET", "INVISIBLE"): 7147 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7148 7149 self._match_text_seq("SET", "DATA") 7150 self._match_text_seq("TYPE") 7151 return self.expression( 7152 exp.AlterColumn, 7153 this=column, 7154 dtype=self._parse_types(), 7155 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7156 using=self._match(TokenType.USING) and self._parse_assignment(), 7157 ) 7158 7159 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7160 if self._match_texts(("ALL", "EVEN", "AUTO")): 7161 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7162 7163 self._match_text_seq("KEY", "DISTKEY") 7164 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7165 7166 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7167 if compound: 7168 self._match_text_seq("SORTKEY") 7169 7170 if self._match(TokenType.L_PAREN, advance=False): 7171 return self.expression( 7172 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7173 ) 7174 7175 self._match_texts(("AUTO", "NONE")) 7176 return self.expression( 7177 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7178 ) 7179 7180 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7181 index = self._index - 1 7182 7183 partition_exists = self._parse_exists() 7184 if self._match(TokenType.PARTITION, advance=False): 7185 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7186 7187 self._retreat(index) 7188 return self._parse_csv(self._parse_drop_column) 7189 7190 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7191 if self._match(TokenType.COLUMN): 7192 exists = self._parse_exists() 7193 old_column = self._parse_column() 7194 to = self._match_text_seq("TO") 7195 new_column = self._parse_column() 7196 7197 if old_column is None or to is None or new_column is None: 7198 return None 7199 7200 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7201 7202 self._match_text_seq("TO") 7203 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7204 7205 def _parse_alter_table_set(self) -> exp.AlterSet: 7206 alter_set = self.expression(exp.AlterSet) 7207 7208 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7209 "TABLE", "PROPERTIES" 7210 ): 7211 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7212 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7213 alter_set.set("expressions", [self._parse_assignment()]) 7214 elif self._match_texts(("LOGGED", "UNLOGGED")): 7215 alter_set.set("option", exp.var(self._prev.text.upper())) 7216 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7217 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7218 elif self._match_text_seq("LOCATION"): 7219 alter_set.set("location", self._parse_field()) 7220 elif self._match_text_seq("ACCESS", "METHOD"): 7221 alter_set.set("access_method", self._parse_field()) 7222 elif self._match_text_seq("TABLESPACE"): 7223 alter_set.set("tablespace", self._parse_field()) 7224 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7225 alter_set.set("file_format", [self._parse_field()]) 7226 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7227 alter_set.set("file_format", self._parse_wrapped_options()) 7228 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7229 alter_set.set("copy_options", self._parse_wrapped_options()) 7230 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7231 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7232 else: 7233 if self._match_text_seq("SERDE"): 7234 alter_set.set("serde", self._parse_field()) 7235 7236 alter_set.set("expressions", [self._parse_properties()]) 7237 7238 return alter_set 7239 7240 def _parse_alter(self) -> exp.Alter | exp.Command: 7241 start = self._prev 7242 7243 alter_token = self._match_set(self.ALTERABLES) and self._prev 7244 if not alter_token: 7245 return self._parse_as_command(start) 7246 7247 exists = self._parse_exists() 7248 only = self._match_text_seq("ONLY") 7249 this = self._parse_table(schema=True) 7250 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7251 7252 if self._next: 7253 self._advance() 7254 7255 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7256 if parser: 7257 actions = ensure_list(parser(self)) 7258 not_valid = self._match_text_seq("NOT", "VALID") 7259 options = self._parse_csv(self._parse_property) 7260 7261 if not self._curr and actions: 7262 return self.expression( 7263 exp.Alter, 7264 this=this, 7265 kind=alter_token.text.upper(), 7266 exists=exists, 7267 actions=actions, 7268 only=only, 7269 options=options, 7270 cluster=cluster, 7271 not_valid=not_valid, 7272 ) 7273 7274 return self._parse_as_command(start) 7275 7276 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7277 start = self._prev 7278 # https://duckdb.org/docs/sql/statements/analyze 7279 if not self._curr: 7280 return self.expression(exp.Analyze) 7281 7282 options = [] 7283 while self._match_texts(self.ANALYZE_STYLES): 7284 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7285 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7286 else: 7287 options.append(self._prev.text.upper()) 7288 7289 this: t.Optional[exp.Expression] = None 7290 inner_expression: t.Optional[exp.Expression] = None 7291 7292 kind = self._curr and self._curr.text.upper() 7293 7294 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7295 this = self._parse_table_parts() 7296 elif self._match_text_seq("TABLES"): 7297 if self._match_set((TokenType.FROM, TokenType.IN)): 7298 kind = f"{kind} {self._prev.text.upper()}" 7299 this = self._parse_table(schema=True, is_db_reference=True) 7300 elif self._match_text_seq("DATABASE"): 7301 this = self._parse_table(schema=True, is_db_reference=True) 7302 elif self._match_text_seq("CLUSTER"): 7303 this = self._parse_table() 7304 # Try matching inner expr keywords before fallback to parse table. 7305 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7306 kind = None 7307 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7308 else: 7309 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7310 kind = None 7311 this = self._parse_table_parts() 7312 7313 partition = self._try_parse(self._parse_partition) 7314 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7315 return self._parse_as_command(start) 7316 7317 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7318 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7319 "WITH", "ASYNC", "MODE" 7320 ): 7321 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7322 else: 7323 mode = None 7324 7325 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7326 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7327 7328 properties = self._parse_properties() 7329 return self.expression( 7330 exp.Analyze, 7331 kind=kind, 7332 this=this, 7333 mode=mode, 7334 partition=partition, 7335 properties=properties, 7336 expression=inner_expression, 7337 options=options, 7338 ) 7339 7340 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7341 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7342 this = None 7343 kind = self._prev.text.upper() 7344 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7345 expressions = [] 7346 7347 if not self._match_text_seq("STATISTICS"): 7348 self.raise_error("Expecting token STATISTICS") 7349 7350 if self._match_text_seq("NOSCAN"): 7351 this = "NOSCAN" 7352 elif self._match(TokenType.FOR): 7353 if self._match_text_seq("ALL", "COLUMNS"): 7354 this = "FOR ALL COLUMNS" 7355 if self._match_texts("COLUMNS"): 7356 this = "FOR COLUMNS" 7357 expressions = self._parse_csv(self._parse_column_reference) 7358 elif self._match_text_seq("SAMPLE"): 7359 sample = self._parse_number() 7360 expressions = [ 7361 self.expression( 7362 exp.AnalyzeSample, 7363 sample=sample, 7364 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7365 ) 7366 ] 7367 7368 return self.expression( 7369 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7370 ) 7371 7372 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7373 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7374 kind = None 7375 this = None 7376 expression: t.Optional[exp.Expression] = None 7377 if self._match_text_seq("REF", "UPDATE"): 7378 kind = "REF" 7379 this = "UPDATE" 7380 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7381 this = "UPDATE SET DANGLING TO NULL" 7382 elif self._match_text_seq("STRUCTURE"): 7383 kind = "STRUCTURE" 7384 if self._match_text_seq("CASCADE", "FAST"): 7385 this = "CASCADE FAST" 7386 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7387 ("ONLINE", "OFFLINE") 7388 ): 7389 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7390 expression = self._parse_into() 7391 7392 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7393 7394 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7395 this = self._prev.text.upper() 7396 if self._match_text_seq("COLUMNS"): 7397 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7398 return None 7399 7400 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7401 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7402 if self._match_text_seq("STATISTICS"): 7403 return self.expression(exp.AnalyzeDelete, kind=kind) 7404 return None 7405 7406 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7407 if self._match_text_seq("CHAINED", "ROWS"): 7408 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7409 return None 7410 7411 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7412 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7413 this = self._prev.text.upper() 7414 expression: t.Optional[exp.Expression] = None 7415 expressions = [] 7416 update_options = None 7417 7418 if self._match_text_seq("HISTOGRAM", "ON"): 7419 expressions = self._parse_csv(self._parse_column_reference) 7420 with_expressions = [] 7421 while self._match(TokenType.WITH): 7422 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7423 if self._match_texts(("SYNC", "ASYNC")): 7424 if self._match_text_seq("MODE", advance=False): 7425 with_expressions.append(f"{self._prev.text.upper()} MODE") 7426 self._advance() 7427 else: 7428 buckets = self._parse_number() 7429 if self._match_text_seq("BUCKETS"): 7430 with_expressions.append(f"{buckets} BUCKETS") 7431 if with_expressions: 7432 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7433 7434 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7435 TokenType.UPDATE, advance=False 7436 ): 7437 update_options = self._prev.text.upper() 7438 self._advance() 7439 elif self._match_text_seq("USING", "DATA"): 7440 expression = self.expression(exp.UsingData, this=self._parse_string()) 7441 7442 return self.expression( 7443 exp.AnalyzeHistogram, 7444 this=this, 7445 expressions=expressions, 7446 expression=expression, 7447 update_options=update_options, 7448 ) 7449 7450 def _parse_merge(self) -> exp.Merge: 7451 self._match(TokenType.INTO) 7452 target = self._parse_table() 7453 7454 if target and self._match(TokenType.ALIAS, advance=False): 7455 target.set("alias", self._parse_table_alias()) 7456 7457 self._match(TokenType.USING) 7458 using = self._parse_table() 7459 7460 self._match(TokenType.ON) 7461 on = self._parse_assignment() 7462 7463 return self.expression( 7464 exp.Merge, 7465 this=target, 7466 using=using, 7467 on=on, 7468 whens=self._parse_when_matched(), 7469 returning=self._parse_returning(), 7470 ) 7471 7472 def _parse_when_matched(self) -> exp.Whens: 7473 whens = [] 7474 7475 while self._match(TokenType.WHEN): 7476 matched = not self._match(TokenType.NOT) 7477 self._match_text_seq("MATCHED") 7478 source = ( 7479 False 7480 if self._match_text_seq("BY", "TARGET") 7481 else self._match_text_seq("BY", "SOURCE") 7482 ) 7483 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7484 7485 self._match(TokenType.THEN) 7486 7487 if self._match(TokenType.INSERT): 7488 this = self._parse_star() 7489 if this: 7490 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7491 else: 7492 then = self.expression( 7493 exp.Insert, 7494 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7495 expression=self._match_text_seq("VALUES") and self._parse_value(), 7496 ) 7497 elif self._match(TokenType.UPDATE): 7498 expressions = self._parse_star() 7499 if expressions: 7500 then = self.expression(exp.Update, expressions=expressions) 7501 else: 7502 then = self.expression( 7503 exp.Update, 7504 expressions=self._match(TokenType.SET) 7505 and self._parse_csv(self._parse_equality), 7506 ) 7507 elif self._match(TokenType.DELETE): 7508 then = self.expression(exp.Var, this=self._prev.text) 7509 else: 7510 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7511 7512 whens.append( 7513 self.expression( 7514 exp.When, 7515 matched=matched, 7516 source=source, 7517 condition=condition, 7518 then=then, 7519 ) 7520 ) 7521 return self.expression(exp.Whens, expressions=whens) 7522 7523 def _parse_show(self) -> t.Optional[exp.Expression]: 7524 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7525 if parser: 7526 return parser(self) 7527 return self._parse_as_command(self._prev) 7528 7529 def _parse_set_item_assignment( 7530 self, kind: t.Optional[str] = None 7531 ) -> t.Optional[exp.Expression]: 7532 index = self._index 7533 7534 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7535 return self._parse_set_transaction(global_=kind == "GLOBAL") 7536 7537 left = self._parse_primary() or self._parse_column() 7538 assignment_delimiter = self._match_texts(("=", "TO")) 7539 7540 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7541 self._retreat(index) 7542 return None 7543 7544 right = self._parse_statement() or self._parse_id_var() 7545 if isinstance(right, (exp.Column, exp.Identifier)): 7546 right = exp.var(right.name) 7547 7548 this = self.expression(exp.EQ, this=left, expression=right) 7549 return self.expression(exp.SetItem, this=this, kind=kind) 7550 7551 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7552 self._match_text_seq("TRANSACTION") 7553 characteristics = self._parse_csv( 7554 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7555 ) 7556 return self.expression( 7557 exp.SetItem, 7558 expressions=characteristics, 7559 kind="TRANSACTION", 7560 **{"global": global_}, # type: ignore 7561 ) 7562 7563 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7564 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7565 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7566 7567 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7568 index = self._index 7569 set_ = self.expression( 7570 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7571 ) 7572 7573 if self._curr: 7574 self._retreat(index) 7575 return self._parse_as_command(self._prev) 7576 7577 return set_ 7578 7579 def _parse_var_from_options( 7580 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7581 ) -> t.Optional[exp.Var]: 7582 start = self._curr 7583 if not start: 7584 return None 7585 7586 option = start.text.upper() 7587 continuations = options.get(option) 7588 7589 index = self._index 7590 self._advance() 7591 for keywords in continuations or []: 7592 if isinstance(keywords, str): 7593 keywords = (keywords,) 7594 7595 if self._match_text_seq(*keywords): 7596 option = f"{option} {' '.join(keywords)}" 7597 break 7598 else: 7599 if continuations or continuations is None: 7600 if raise_unmatched: 7601 self.raise_error(f"Unknown option {option}") 7602 7603 self._retreat(index) 7604 return None 7605 7606 return exp.var(option) 7607 7608 def _parse_as_command(self, start: Token) -> exp.Command: 7609 while self._curr: 7610 self._advance() 7611 text = self._find_sql(start, self._prev) 7612 size = len(start.text) 7613 self._warn_unsupported() 7614 return exp.Command(this=text[:size], expression=text[size:]) 7615 7616 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7617 settings = [] 7618 7619 self._match_l_paren() 7620 kind = self._parse_id_var() 7621 7622 if self._match(TokenType.L_PAREN): 7623 while True: 7624 key = self._parse_id_var() 7625 value = self._parse_primary() 7626 if not key and value is None: 7627 break 7628 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7629 self._match(TokenType.R_PAREN) 7630 7631 self._match_r_paren() 7632 7633 return self.expression( 7634 exp.DictProperty, 7635 this=this, 7636 kind=kind.this if kind else None, 7637 settings=settings, 7638 ) 7639 7640 def _parse_dict_range(self, this: str) -> exp.DictRange: 7641 self._match_l_paren() 7642 has_min = self._match_text_seq("MIN") 7643 if has_min: 7644 min = self._parse_var() or self._parse_primary() 7645 self._match_text_seq("MAX") 7646 max = self._parse_var() or self._parse_primary() 7647 else: 7648 max = self._parse_var() or self._parse_primary() 7649 min = exp.Literal.number(0) 7650 self._match_r_paren() 7651 return self.expression(exp.DictRange, this=this, min=min, max=max) 7652 7653 def _parse_comprehension( 7654 self, this: t.Optional[exp.Expression] 7655 ) -> t.Optional[exp.Comprehension]: 7656 index = self._index 7657 expression = self._parse_column() 7658 if not self._match(TokenType.IN): 7659 self._retreat(index - 1) 7660 return None 7661 iterator = self._parse_column() 7662 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7663 return self.expression( 7664 exp.Comprehension, 7665 this=this, 7666 expression=expression, 7667 iterator=iterator, 7668 condition=condition, 7669 ) 7670 7671 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7672 if self._match(TokenType.HEREDOC_STRING): 7673 return self.expression(exp.Heredoc, this=self._prev.text) 7674 7675 if not self._match_text_seq("$"): 7676 return None 7677 7678 tags = ["$"] 7679 tag_text = None 7680 7681 if self._is_connected(): 7682 self._advance() 7683 tags.append(self._prev.text.upper()) 7684 else: 7685 self.raise_error("No closing $ found") 7686 7687 if tags[-1] != "$": 7688 if self._is_connected() and self._match_text_seq("$"): 7689 tag_text = tags[-1] 7690 tags.append("$") 7691 else: 7692 self.raise_error("No closing $ found") 7693 7694 heredoc_start = self._curr 7695 7696 while self._curr: 7697 if self._match_text_seq(*tags, advance=False): 7698 this = self._find_sql(heredoc_start, self._prev) 7699 self._advance(len(tags)) 7700 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7701 7702 self._advance() 7703 7704 self.raise_error(f"No closing {''.join(tags)} found") 7705 return None 7706 7707 def _find_parser( 7708 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7709 ) -> t.Optional[t.Callable]: 7710 if not self._curr: 7711 return None 7712 7713 index = self._index 7714 this = [] 7715 while True: 7716 # The current token might be multiple words 7717 curr = self._curr.text.upper() 7718 key = curr.split(" ") 7719 this.append(curr) 7720 7721 self._advance() 7722 result, trie = in_trie(trie, key) 7723 if result == TrieResult.FAILED: 7724 break 7725 7726 if result == TrieResult.EXISTS: 7727 subparser = parsers[" ".join(this)] 7728 return subparser 7729 7730 self._retreat(index) 7731 return None 7732 7733 def _match(self, token_type, advance=True, expression=None): 7734 if not self._curr: 7735 return None 7736 7737 if self._curr.token_type == token_type: 7738 if advance: 7739 self._advance() 7740 self._add_comments(expression) 7741 return True 7742 7743 return None 7744 7745 def _match_set(self, types, advance=True): 7746 if not self._curr: 7747 return None 7748 7749 if self._curr.token_type in types: 7750 if advance: 7751 self._advance() 7752 return True 7753 7754 return None 7755 7756 def _match_pair(self, token_type_a, token_type_b, advance=True): 7757 if not self._curr or not self._next: 7758 return None 7759 7760 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7761 if advance: 7762 self._advance(2) 7763 return True 7764 7765 return None 7766 7767 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7768 if not self._match(TokenType.L_PAREN, expression=expression): 7769 self.raise_error("Expecting (") 7770 7771 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7772 if not self._match(TokenType.R_PAREN, expression=expression): 7773 self.raise_error("Expecting )") 7774 7775 def _match_texts(self, texts, advance=True): 7776 if ( 7777 self._curr 7778 and self._curr.token_type != TokenType.STRING 7779 and self._curr.text.upper() in texts 7780 ): 7781 if advance: 7782 self._advance() 7783 return True 7784 return None 7785 7786 def _match_text_seq(self, *texts, advance=True): 7787 index = self._index 7788 for text in texts: 7789 if ( 7790 self._curr 7791 and self._curr.token_type != TokenType.STRING 7792 and self._curr.text.upper() == text 7793 ): 7794 self._advance() 7795 else: 7796 self._retreat(index) 7797 return None 7798 7799 if not advance: 7800 self._retreat(index) 7801 7802 return True 7803 7804 def _replace_lambda( 7805 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7806 ) -> t.Optional[exp.Expression]: 7807 if not node: 7808 return node 7809 7810 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7811 7812 for column in node.find_all(exp.Column): 7813 typ = lambda_types.get(column.parts[0].name) 7814 if typ is not None: 7815 dot_or_id = column.to_dot() if column.table else column.this 7816 7817 if typ: 7818 dot_or_id = self.expression( 7819 exp.Cast, 7820 this=dot_or_id, 7821 to=typ, 7822 ) 7823 7824 parent = column.parent 7825 7826 while isinstance(parent, exp.Dot): 7827 if not isinstance(parent.parent, exp.Dot): 7828 parent.replace(dot_or_id) 7829 break 7830 parent = parent.parent 7831 else: 7832 if column is node: 7833 node = dot_or_id 7834 else: 7835 column.replace(dot_or_id) 7836 return node 7837 7838 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7839 start = self._prev 7840 7841 # Not to be confused with TRUNCATE(number, decimals) function call 7842 if self._match(TokenType.L_PAREN): 7843 self._retreat(self._index - 2) 7844 return self._parse_function() 7845 7846 # Clickhouse supports TRUNCATE DATABASE as well 7847 is_database = self._match(TokenType.DATABASE) 7848 7849 self._match(TokenType.TABLE) 7850 7851 exists = self._parse_exists(not_=False) 7852 7853 expressions = self._parse_csv( 7854 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7855 ) 7856 7857 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7858 7859 if self._match_text_seq("RESTART", "IDENTITY"): 7860 identity = "RESTART" 7861 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7862 identity = "CONTINUE" 7863 else: 7864 identity = None 7865 7866 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7867 option = self._prev.text 7868 else: 7869 option = None 7870 7871 partition = self._parse_partition() 7872 7873 # Fallback case 7874 if self._curr: 7875 return self._parse_as_command(start) 7876 7877 return self.expression( 7878 exp.TruncateTable, 7879 expressions=expressions, 7880 is_database=is_database, 7881 exists=exists, 7882 cluster=cluster, 7883 identity=identity, 7884 option=option, 7885 partition=partition, 7886 ) 7887 7888 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7889 this = self._parse_ordered(self._parse_opclass) 7890 7891 if not self._match(TokenType.WITH): 7892 return this 7893 7894 op = self._parse_var(any_token=True) 7895 7896 return self.expression(exp.WithOperator, this=this, op=op) 7897 7898 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7899 self._match(TokenType.EQ) 7900 self._match(TokenType.L_PAREN) 7901 7902 opts: t.List[t.Optional[exp.Expression]] = [] 7903 while self._curr and not self._match(TokenType.R_PAREN): 7904 if self._match_text_seq("FORMAT_NAME", "="): 7905 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 7906 prop = self.expression( 7907 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_table_parts() 7908 ) 7909 opts.append(prop) 7910 else: 7911 opts.append(self._parse_property()) 7912 7913 self._match(TokenType.COMMA) 7914 7915 return opts 7916 7917 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7918 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7919 7920 options = [] 7921 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7922 option = self._parse_var(any_token=True) 7923 prev = self._prev.text.upper() 7924 7925 # Different dialects might separate options and values by white space, "=" and "AS" 7926 self._match(TokenType.EQ) 7927 self._match(TokenType.ALIAS) 7928 7929 param = self.expression(exp.CopyParameter, this=option) 7930 7931 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7932 TokenType.L_PAREN, advance=False 7933 ): 7934 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7935 param.set("expressions", self._parse_wrapped_options()) 7936 elif prev == "FILE_FORMAT": 7937 # T-SQL's external file format case 7938 param.set("expression", self._parse_field()) 7939 else: 7940 param.set("expression", self._parse_unquoted_field()) 7941 7942 options.append(param) 7943 self._match(sep) 7944 7945 return options 7946 7947 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7948 expr = self.expression(exp.Credentials) 7949 7950 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7951 expr.set("storage", self._parse_field()) 7952 if self._match_text_seq("CREDENTIALS"): 7953 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7954 creds = ( 7955 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7956 ) 7957 expr.set("credentials", creds) 7958 if self._match_text_seq("ENCRYPTION"): 7959 expr.set("encryption", self._parse_wrapped_options()) 7960 if self._match_text_seq("IAM_ROLE"): 7961 expr.set("iam_role", self._parse_field()) 7962 if self._match_text_seq("REGION"): 7963 expr.set("region", self._parse_field()) 7964 7965 return expr 7966 7967 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7968 return self._parse_field() 7969 7970 def _parse_copy(self) -> exp.Copy | exp.Command: 7971 start = self._prev 7972 7973 self._match(TokenType.INTO) 7974 7975 this = ( 7976 self._parse_select(nested=True, parse_subquery_alias=False) 7977 if self._match(TokenType.L_PAREN, advance=False) 7978 else self._parse_table(schema=True) 7979 ) 7980 7981 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7982 7983 files = self._parse_csv(self._parse_file_location) 7984 credentials = self._parse_credentials() 7985 7986 self._match_text_seq("WITH") 7987 7988 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7989 7990 # Fallback case 7991 if self._curr: 7992 return self._parse_as_command(start) 7993 7994 return self.expression( 7995 exp.Copy, 7996 this=this, 7997 kind=kind, 7998 credentials=credentials, 7999 files=files, 8000 params=params, 8001 ) 8002 8003 def _parse_normalize(self) -> exp.Normalize: 8004 return self.expression( 8005 exp.Normalize, 8006 this=self._parse_bitwise(), 8007 form=self._match(TokenType.COMMA) and self._parse_var(), 8008 ) 8009 8010 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8011 args = self._parse_csv(lambda: self._parse_lambda()) 8012 8013 this = seq_get(args, 0) 8014 decimals = seq_get(args, 1) 8015 8016 return expr_type( 8017 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8018 ) 8019 8020 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8021 if self._match_text_seq("COLUMNS", "(", advance=False): 8022 this = self._parse_function() 8023 if isinstance(this, exp.Columns): 8024 this.set("unpack", True) 8025 return this 8026 8027 return self.expression( 8028 exp.Star, 8029 **{ # type: ignore 8030 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8031 "replace": self._parse_star_op("REPLACE"), 8032 "rename": self._parse_star_op("RENAME"), 8033 }, 8034 ) 8035 8036 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8037 privilege_parts = [] 8038 8039 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8040 # (end of privilege list) or L_PAREN (start of column list) are met 8041 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8042 privilege_parts.append(self._curr.text.upper()) 8043 self._advance() 8044 8045 this = exp.var(" ".join(privilege_parts)) 8046 expressions = ( 8047 self._parse_wrapped_csv(self._parse_column) 8048 if self._match(TokenType.L_PAREN, advance=False) 8049 else None 8050 ) 8051 8052 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8053 8054 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8055 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8056 principal = self._parse_id_var() 8057 8058 if not principal: 8059 return None 8060 8061 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8062 8063 def _parse_grant(self) -> exp.Grant | exp.Command: 8064 start = self._prev 8065 8066 privileges = self._parse_csv(self._parse_grant_privilege) 8067 8068 self._match(TokenType.ON) 8069 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8070 8071 # Attempt to parse the securable e.g. MySQL allows names 8072 # such as "foo.*", "*.*" which are not easily parseable yet 8073 securable = self._try_parse(self._parse_table_parts) 8074 8075 if not securable or not self._match_text_seq("TO"): 8076 return self._parse_as_command(start) 8077 8078 principals = self._parse_csv(self._parse_grant_principal) 8079 8080 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8081 8082 if self._curr: 8083 return self._parse_as_command(start) 8084 8085 return self.expression( 8086 exp.Grant, 8087 privileges=privileges, 8088 kind=kind, 8089 securable=securable, 8090 principals=principals, 8091 grant_option=grant_option, 8092 ) 8093 8094 def _parse_overlay(self) -> exp.Overlay: 8095 return self.expression( 8096 exp.Overlay, 8097 **{ # type: ignore 8098 "this": self._parse_bitwise(), 8099 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8100 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8101 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8102 }, 8103 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1469 def __init__( 1470 self, 1471 error_level: t.Optional[ErrorLevel] = None, 1472 error_message_context: int = 100, 1473 max_errors: int = 3, 1474 dialect: DialectType = None, 1475 ): 1476 from sqlglot.dialects import Dialect 1477 1478 self.error_level = error_level or ErrorLevel.IMMEDIATE 1479 self.error_message_context = error_message_context 1480 self.max_errors = max_errors 1481 self.dialect = Dialect.get_or_raise(dialect) 1482 self.reset()
1494 def parse( 1495 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1496 ) -> t.List[t.Optional[exp.Expression]]: 1497 """ 1498 Parses a list of tokens and returns a list of syntax trees, one tree 1499 per parsed SQL statement. 1500 1501 Args: 1502 raw_tokens: The list of tokens. 1503 sql: The original SQL string, used to produce helpful debug messages. 1504 1505 Returns: 1506 The list of the produced syntax trees. 1507 """ 1508 return self._parse( 1509 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1510 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1512 def parse_into( 1513 self, 1514 expression_types: exp.IntoType, 1515 raw_tokens: t.List[Token], 1516 sql: t.Optional[str] = None, 1517 ) -> t.List[t.Optional[exp.Expression]]: 1518 """ 1519 Parses a list of tokens into a given Expression type. If a collection of Expression 1520 types is given instead, this method will try to parse the token list into each one 1521 of them, stopping at the first for which the parsing succeeds. 1522 1523 Args: 1524 expression_types: The expression type(s) to try and parse the token list into. 1525 raw_tokens: The list of tokens. 1526 sql: The original SQL string, used to produce helpful debug messages. 1527 1528 Returns: 1529 The target Expression. 1530 """ 1531 errors = [] 1532 for expression_type in ensure_list(expression_types): 1533 parser = self.EXPRESSION_PARSERS.get(expression_type) 1534 if not parser: 1535 raise TypeError(f"No parser registered for {expression_type}") 1536 1537 try: 1538 return self._parse(parser, raw_tokens, sql) 1539 except ParseError as e: 1540 e.errors[0]["into_expression"] = expression_type 1541 errors.append(e) 1542 1543 raise ParseError( 1544 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1545 errors=merge_errors(errors), 1546 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1586 def check_errors(self) -> None: 1587 """Logs or raises any found errors, depending on the chosen error level setting.""" 1588 if self.error_level == ErrorLevel.WARN: 1589 for error in self.errors: 1590 logger.error(str(error)) 1591 elif self.error_level == ErrorLevel.RAISE and self.errors: 1592 raise ParseError( 1593 concat_messages(self.errors, self.max_errors), 1594 errors=merge_errors(self.errors), 1595 )
Logs or raises any found errors, depending on the chosen error level setting.
1597 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1598 """ 1599 Appends an error in the list of recorded errors or raises it, depending on the chosen 1600 error level setting. 1601 """ 1602 token = token or self._curr or self._prev or Token.string("") 1603 start = token.start 1604 end = token.end + 1 1605 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1606 highlight = self.sql[start:end] 1607 end_context = self.sql[end : end + self.error_message_context] 1608 1609 error = ParseError.new( 1610 f"{message}. Line {token.line}, Col: {token.col}.\n" 1611 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1612 description=message, 1613 line=token.line, 1614 col=token.col, 1615 start_context=start_context, 1616 highlight=highlight, 1617 end_context=end_context, 1618 ) 1619 1620 if self.error_level == ErrorLevel.IMMEDIATE: 1621 raise error 1622 1623 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1625 def expression( 1626 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1627 ) -> E: 1628 """ 1629 Creates a new, validated Expression. 1630 1631 Args: 1632 exp_class: The expression class to instantiate. 1633 comments: An optional list of comments to attach to the expression. 1634 kwargs: The arguments to set for the expression along with their respective values. 1635 1636 Returns: 1637 The target expression. 1638 """ 1639 instance = exp_class(**kwargs) 1640 instance.add_comments(comments) if comments else self._add_comments(instance) 1641 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1648 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1649 """ 1650 Validates an Expression, making sure that all its mandatory arguments are set. 1651 1652 Args: 1653 expression: The expression to validate. 1654 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1655 1656 Returns: 1657 The validated expression. 1658 """ 1659 if self.error_level != ErrorLevel.IGNORE: 1660 for error_message in expression.error_messages(args): 1661 self.raise_error(error_message) 1662 1663 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.