sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.CHAR, 322 TokenType.NCHAR, 323 TokenType.VARCHAR, 324 TokenType.NVARCHAR, 325 TokenType.BPCHAR, 326 TokenType.TEXT, 327 TokenType.MEDIUMTEXT, 328 TokenType.LONGTEXT, 329 TokenType.MEDIUMBLOB, 330 TokenType.LONGBLOB, 331 TokenType.BINARY, 332 TokenType.VARBINARY, 333 TokenType.JSON, 334 TokenType.JSONB, 335 TokenType.INTERVAL, 336 TokenType.TINYBLOB, 337 TokenType.TINYTEXT, 338 TokenType.TIME, 339 TokenType.TIMETZ, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMP_S, 342 TokenType.TIMESTAMP_MS, 343 TokenType.TIMESTAMP_NS, 344 TokenType.TIMESTAMPTZ, 345 TokenType.TIMESTAMPLTZ, 346 TokenType.TIMESTAMPNTZ, 347 TokenType.DATETIME, 348 TokenType.DATETIME2, 349 TokenType.DATETIME64, 350 TokenType.SMALLDATETIME, 351 TokenType.DATE, 352 TokenType.DATE32, 353 TokenType.INT4RANGE, 354 TokenType.INT4MULTIRANGE, 355 TokenType.INT8RANGE, 356 TokenType.INT8MULTIRANGE, 357 TokenType.NUMRANGE, 358 TokenType.NUMMULTIRANGE, 359 TokenType.TSRANGE, 360 TokenType.TSMULTIRANGE, 361 TokenType.TSTZRANGE, 362 TokenType.TSTZMULTIRANGE, 363 TokenType.DATERANGE, 364 TokenType.DATEMULTIRANGE, 365 TokenType.DECIMAL, 366 TokenType.DECIMAL32, 367 TokenType.DECIMAL64, 368 TokenType.DECIMAL128, 369 TokenType.DECIMAL256, 370 TokenType.UDECIMAL, 371 TokenType.BIGDECIMAL, 372 TokenType.UUID, 373 TokenType.GEOGRAPHY, 374 TokenType.GEOMETRY, 375 TokenType.POINT, 376 TokenType.RING, 377 TokenType.LINESTRING, 378 TokenType.MULTILINESTRING, 379 TokenType.POLYGON, 380 TokenType.MULTIPOLYGON, 381 TokenType.HLLSKETCH, 382 TokenType.HSTORE, 383 TokenType.PSEUDO_TYPE, 384 TokenType.SUPER, 385 TokenType.SERIAL, 386 TokenType.SMALLSERIAL, 387 TokenType.BIGSERIAL, 388 TokenType.XML, 389 TokenType.YEAR, 390 TokenType.UNIQUEIDENTIFIER, 391 TokenType.USERDEFINED, 392 TokenType.MONEY, 393 TokenType.SMALLMONEY, 394 TokenType.ROWVERSION, 395 TokenType.IMAGE, 396 TokenType.VARIANT, 397 TokenType.VECTOR, 398 TokenType.OBJECT, 399 TokenType.OBJECT_IDENTIFIER, 400 TokenType.INET, 401 TokenType.IPADDRESS, 402 TokenType.IPPREFIX, 403 TokenType.IPV4, 404 TokenType.IPV6, 405 TokenType.UNKNOWN, 406 TokenType.NULL, 407 TokenType.NAME, 408 TokenType.TDIGEST, 409 TokenType.DYNAMIC, 410 *ENUM_TYPE_TOKENS, 411 *NESTED_TYPE_TOKENS, 412 *AGGREGATE_TYPE_TOKENS, 413 } 414 415 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 416 TokenType.BIGINT: TokenType.UBIGINT, 417 TokenType.INT: TokenType.UINT, 418 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 419 TokenType.SMALLINT: TokenType.USMALLINT, 420 TokenType.TINYINT: TokenType.UTINYINT, 421 TokenType.DECIMAL: TokenType.UDECIMAL, 422 } 423 424 SUBQUERY_PREDICATES = { 425 TokenType.ANY: exp.Any, 426 TokenType.ALL: exp.All, 427 TokenType.EXISTS: exp.Exists, 428 TokenType.SOME: exp.Any, 429 } 430 431 RESERVED_TOKENS = { 432 *Tokenizer.SINGLE_TOKENS.values(), 433 TokenType.SELECT, 434 } - {TokenType.IDENTIFIER} 435 436 DB_CREATABLES = { 437 TokenType.DATABASE, 438 TokenType.DICTIONARY, 439 TokenType.MODEL, 440 TokenType.NAMESPACE, 441 TokenType.SCHEMA, 442 TokenType.SEQUENCE, 443 TokenType.SINK, 444 TokenType.SOURCE, 445 TokenType.STORAGE_INTEGRATION, 446 TokenType.STREAMLIT, 447 TokenType.TABLE, 448 TokenType.TAG, 449 TokenType.VIEW, 450 TokenType.WAREHOUSE, 451 } 452 453 CREATABLES = { 454 TokenType.COLUMN, 455 TokenType.CONSTRAINT, 456 TokenType.FOREIGN_KEY, 457 TokenType.FUNCTION, 458 TokenType.INDEX, 459 TokenType.PROCEDURE, 460 *DB_CREATABLES, 461 } 462 463 ALTERABLES = { 464 TokenType.INDEX, 465 TokenType.TABLE, 466 TokenType.VIEW, 467 } 468 469 # Tokens that can represent identifiers 470 ID_VAR_TOKENS = { 471 TokenType.ALL, 472 TokenType.ATTACH, 473 TokenType.VAR, 474 TokenType.ANTI, 475 TokenType.APPLY, 476 TokenType.ASC, 477 TokenType.ASOF, 478 TokenType.AUTO_INCREMENT, 479 TokenType.BEGIN, 480 TokenType.BPCHAR, 481 TokenType.CACHE, 482 TokenType.CASE, 483 TokenType.COLLATE, 484 TokenType.COMMAND, 485 TokenType.COMMENT, 486 TokenType.COMMIT, 487 TokenType.CONSTRAINT, 488 TokenType.COPY, 489 TokenType.CUBE, 490 TokenType.DEFAULT, 491 TokenType.DELETE, 492 TokenType.DESC, 493 TokenType.DESCRIBE, 494 TokenType.DETACH, 495 TokenType.DICTIONARY, 496 TokenType.DIV, 497 TokenType.END, 498 TokenType.EXECUTE, 499 TokenType.ESCAPE, 500 TokenType.FALSE, 501 TokenType.FIRST, 502 TokenType.FILTER, 503 TokenType.FINAL, 504 TokenType.FORMAT, 505 TokenType.FULL, 506 TokenType.IDENTIFIER, 507 TokenType.IS, 508 TokenType.ISNULL, 509 TokenType.INTERVAL, 510 TokenType.KEEP, 511 TokenType.KILL, 512 TokenType.LEFT, 513 TokenType.LIMIT, 514 TokenType.LOAD, 515 TokenType.MERGE, 516 TokenType.NATURAL, 517 TokenType.NEXT, 518 TokenType.OFFSET, 519 TokenType.OPERATOR, 520 TokenType.ORDINALITY, 521 TokenType.OVERLAPS, 522 TokenType.OVERWRITE, 523 TokenType.PARTITION, 524 TokenType.PERCENT, 525 TokenType.PIVOT, 526 TokenType.PRAGMA, 527 TokenType.RANGE, 528 TokenType.RECURSIVE, 529 TokenType.REFERENCES, 530 TokenType.REFRESH, 531 TokenType.RENAME, 532 TokenType.REPLACE, 533 TokenType.RIGHT, 534 TokenType.ROLLUP, 535 TokenType.ROW, 536 TokenType.ROWS, 537 TokenType.SEMI, 538 TokenType.SET, 539 TokenType.SETTINGS, 540 TokenType.SHOW, 541 TokenType.TEMPORARY, 542 TokenType.TOP, 543 TokenType.TRUE, 544 TokenType.TRUNCATE, 545 TokenType.UNIQUE, 546 TokenType.UNNEST, 547 TokenType.UNPIVOT, 548 TokenType.UPDATE, 549 TokenType.USE, 550 TokenType.VOLATILE, 551 TokenType.WINDOW, 552 *CREATABLES, 553 *SUBQUERY_PREDICATES, 554 *TYPE_TOKENS, 555 *NO_PAREN_FUNCTIONS, 556 } 557 ID_VAR_TOKENS.remove(TokenType.UNION) 558 559 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 560 561 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 562 TokenType.ANTI, 563 TokenType.APPLY, 564 TokenType.ASOF, 565 TokenType.FULL, 566 TokenType.LEFT, 567 TokenType.LOCK, 568 TokenType.NATURAL, 569 TokenType.RIGHT, 570 TokenType.SEMI, 571 TokenType.WINDOW, 572 } 573 574 ALIAS_TOKENS = ID_VAR_TOKENS 575 576 ARRAY_CONSTRUCTORS = { 577 "ARRAY": exp.Array, 578 "LIST": exp.List, 579 } 580 581 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 582 583 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 584 585 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 586 587 FUNC_TOKENS = { 588 TokenType.COLLATE, 589 TokenType.COMMAND, 590 TokenType.CURRENT_DATE, 591 TokenType.CURRENT_DATETIME, 592 TokenType.CURRENT_TIMESTAMP, 593 TokenType.CURRENT_TIME, 594 TokenType.CURRENT_USER, 595 TokenType.FILTER, 596 TokenType.FIRST, 597 TokenType.FORMAT, 598 TokenType.GLOB, 599 TokenType.IDENTIFIER, 600 TokenType.INDEX, 601 TokenType.ISNULL, 602 TokenType.ILIKE, 603 TokenType.INSERT, 604 TokenType.LIKE, 605 TokenType.MERGE, 606 TokenType.NEXT, 607 TokenType.OFFSET, 608 TokenType.PRIMARY_KEY, 609 TokenType.RANGE, 610 TokenType.REPLACE, 611 TokenType.RLIKE, 612 TokenType.ROW, 613 TokenType.UNNEST, 614 TokenType.VAR, 615 TokenType.LEFT, 616 TokenType.RIGHT, 617 TokenType.SEQUENCE, 618 TokenType.DATE, 619 TokenType.DATETIME, 620 TokenType.TABLE, 621 TokenType.TIMESTAMP, 622 TokenType.TIMESTAMPTZ, 623 TokenType.TRUNCATE, 624 TokenType.WINDOW, 625 TokenType.XOR, 626 *TYPE_TOKENS, 627 *SUBQUERY_PREDICATES, 628 } 629 630 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 631 TokenType.AND: exp.And, 632 } 633 634 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 635 TokenType.COLON_EQ: exp.PropertyEQ, 636 } 637 638 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 639 TokenType.OR: exp.Or, 640 } 641 642 EQUALITY = { 643 TokenType.EQ: exp.EQ, 644 TokenType.NEQ: exp.NEQ, 645 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 646 } 647 648 COMPARISON = { 649 TokenType.GT: exp.GT, 650 TokenType.GTE: exp.GTE, 651 TokenType.LT: exp.LT, 652 TokenType.LTE: exp.LTE, 653 } 654 655 BITWISE = { 656 TokenType.AMP: exp.BitwiseAnd, 657 TokenType.CARET: exp.BitwiseXor, 658 TokenType.PIPE: exp.BitwiseOr, 659 } 660 661 TERM = { 662 TokenType.DASH: exp.Sub, 663 TokenType.PLUS: exp.Add, 664 TokenType.MOD: exp.Mod, 665 TokenType.COLLATE: exp.Collate, 666 } 667 668 FACTOR = { 669 TokenType.DIV: exp.IntDiv, 670 TokenType.LR_ARROW: exp.Distance, 671 TokenType.SLASH: exp.Div, 672 TokenType.STAR: exp.Mul, 673 } 674 675 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 676 677 TIMES = { 678 TokenType.TIME, 679 TokenType.TIMETZ, 680 } 681 682 TIMESTAMPS = { 683 TokenType.TIMESTAMP, 684 TokenType.TIMESTAMPTZ, 685 TokenType.TIMESTAMPLTZ, 686 *TIMES, 687 } 688 689 SET_OPERATIONS = { 690 TokenType.UNION, 691 TokenType.INTERSECT, 692 TokenType.EXCEPT, 693 } 694 695 JOIN_METHODS = { 696 TokenType.ASOF, 697 TokenType.NATURAL, 698 TokenType.POSITIONAL, 699 } 700 701 JOIN_SIDES = { 702 TokenType.LEFT, 703 TokenType.RIGHT, 704 TokenType.FULL, 705 } 706 707 JOIN_KINDS = { 708 TokenType.ANTI, 709 TokenType.CROSS, 710 TokenType.INNER, 711 TokenType.OUTER, 712 TokenType.SEMI, 713 TokenType.STRAIGHT_JOIN, 714 } 715 716 JOIN_HINTS: t.Set[str] = set() 717 718 LAMBDAS = { 719 TokenType.ARROW: lambda self, expressions: self.expression( 720 exp.Lambda, 721 this=self._replace_lambda( 722 self._parse_assignment(), 723 expressions, 724 ), 725 expressions=expressions, 726 ), 727 TokenType.FARROW: lambda self, expressions: self.expression( 728 exp.Kwarg, 729 this=exp.var(expressions[0].name), 730 expression=self._parse_assignment(), 731 ), 732 } 733 734 COLUMN_OPERATORS = { 735 TokenType.DOT: None, 736 TokenType.DCOLON: lambda self, this, to: self.expression( 737 exp.Cast if self.STRICT_CAST else exp.TryCast, 738 this=this, 739 to=to, 740 ), 741 TokenType.ARROW: lambda self, this, path: self.expression( 742 exp.JSONExtract, 743 this=this, 744 expression=self.dialect.to_json_path(path), 745 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 746 ), 747 TokenType.DARROW: lambda self, this, path: self.expression( 748 exp.JSONExtractScalar, 749 this=this, 750 expression=self.dialect.to_json_path(path), 751 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 752 ), 753 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 754 exp.JSONBExtract, 755 this=this, 756 expression=path, 757 ), 758 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 759 exp.JSONBExtractScalar, 760 this=this, 761 expression=path, 762 ), 763 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 764 exp.JSONBContains, 765 this=this, 766 expression=key, 767 ), 768 } 769 770 EXPRESSION_PARSERS = { 771 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 772 exp.Column: lambda self: self._parse_column(), 773 exp.Condition: lambda self: self._parse_assignment(), 774 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 775 exp.Expression: lambda self: self._parse_expression(), 776 exp.From: lambda self: self._parse_from(joins=True), 777 exp.Group: lambda self: self._parse_group(), 778 exp.Having: lambda self: self._parse_having(), 779 exp.Hint: lambda self: self._parse_hint_body(), 780 exp.Identifier: lambda self: self._parse_id_var(), 781 exp.Join: lambda self: self._parse_join(), 782 exp.Lambda: lambda self: self._parse_lambda(), 783 exp.Lateral: lambda self: self._parse_lateral(), 784 exp.Limit: lambda self: self._parse_limit(), 785 exp.Offset: lambda self: self._parse_offset(), 786 exp.Order: lambda self: self._parse_order(), 787 exp.Ordered: lambda self: self._parse_ordered(), 788 exp.Properties: lambda self: self._parse_properties(), 789 exp.Qualify: lambda self: self._parse_qualify(), 790 exp.Returning: lambda self: self._parse_returning(), 791 exp.Select: lambda self: self._parse_select(), 792 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 793 exp.Table: lambda self: self._parse_table_parts(), 794 exp.TableAlias: lambda self: self._parse_table_alias(), 795 exp.Tuple: lambda self: self._parse_value(), 796 exp.Whens: lambda self: self._parse_when_matched(), 797 exp.Where: lambda self: self._parse_where(), 798 exp.Window: lambda self: self._parse_named_window(), 799 exp.With: lambda self: self._parse_with(), 800 "JOIN_TYPE": lambda self: self._parse_join_parts(), 801 } 802 803 STATEMENT_PARSERS = { 804 TokenType.ALTER: lambda self: self._parse_alter(), 805 TokenType.ANALYZE: lambda self: self._parse_analyze(), 806 TokenType.BEGIN: lambda self: self._parse_transaction(), 807 TokenType.CACHE: lambda self: self._parse_cache(), 808 TokenType.COMMENT: lambda self: self._parse_comment(), 809 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 810 TokenType.COPY: lambda self: self._parse_copy(), 811 TokenType.CREATE: lambda self: self._parse_create(), 812 TokenType.DELETE: lambda self: self._parse_delete(), 813 TokenType.DESC: lambda self: self._parse_describe(), 814 TokenType.DESCRIBE: lambda self: self._parse_describe(), 815 TokenType.DROP: lambda self: self._parse_drop(), 816 TokenType.GRANT: lambda self: self._parse_grant(), 817 TokenType.INSERT: lambda self: self._parse_insert(), 818 TokenType.KILL: lambda self: self._parse_kill(), 819 TokenType.LOAD: lambda self: self._parse_load(), 820 TokenType.MERGE: lambda self: self._parse_merge(), 821 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 822 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 823 TokenType.REFRESH: lambda self: self._parse_refresh(), 824 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 825 TokenType.SET: lambda self: self._parse_set(), 826 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 827 TokenType.UNCACHE: lambda self: self._parse_uncache(), 828 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 829 TokenType.UPDATE: lambda self: self._parse_update(), 830 TokenType.USE: lambda self: self.expression( 831 exp.Use, 832 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 833 this=self._parse_table(schema=False), 834 ), 835 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 836 } 837 838 UNARY_PARSERS = { 839 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 840 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 841 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 842 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 843 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 844 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 845 } 846 847 STRING_PARSERS = { 848 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 849 exp.RawString, this=token.text 850 ), 851 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 852 exp.National, this=token.text 853 ), 854 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 855 TokenType.STRING: lambda self, token: self.expression( 856 exp.Literal, this=token.text, is_string=True 857 ), 858 TokenType.UNICODE_STRING: lambda self, token: self.expression( 859 exp.UnicodeString, 860 this=token.text, 861 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 862 ), 863 } 864 865 NUMERIC_PARSERS = { 866 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 867 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 868 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 869 TokenType.NUMBER: lambda self, token: self.expression( 870 exp.Literal, this=token.text, is_string=False 871 ), 872 } 873 874 PRIMARY_PARSERS = { 875 **STRING_PARSERS, 876 **NUMERIC_PARSERS, 877 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 878 TokenType.NULL: lambda self, _: self.expression(exp.Null), 879 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 880 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 881 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 882 TokenType.STAR: lambda self, _: self._parse_star_ops(), 883 } 884 885 PLACEHOLDER_PARSERS = { 886 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 887 TokenType.PARAMETER: lambda self: self._parse_parameter(), 888 TokenType.COLON: lambda self: ( 889 self.expression(exp.Placeholder, this=self._prev.text) 890 if self._match_set(self.ID_VAR_TOKENS) 891 else None 892 ), 893 } 894 895 RANGE_PARSERS = { 896 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 897 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 898 TokenType.GLOB: binary_range_parser(exp.Glob), 899 TokenType.ILIKE: binary_range_parser(exp.ILike), 900 TokenType.IN: lambda self, this: self._parse_in(this), 901 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 902 TokenType.IS: lambda self, this: self._parse_is(this), 903 TokenType.LIKE: binary_range_parser(exp.Like), 904 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 905 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 906 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 907 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 908 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 909 } 910 911 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 912 "ALLOWED_VALUES": lambda self: self.expression( 913 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 914 ), 915 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 916 "AUTO": lambda self: self._parse_auto_property(), 917 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 918 "BACKUP": lambda self: self.expression( 919 exp.BackupProperty, this=self._parse_var(any_token=True) 920 ), 921 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 922 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 923 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 924 "CHECKSUM": lambda self: self._parse_checksum(), 925 "CLUSTER BY": lambda self: self._parse_cluster(), 926 "CLUSTERED": lambda self: self._parse_clustered_by(), 927 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 928 exp.CollateProperty, **kwargs 929 ), 930 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 931 "CONTAINS": lambda self: self._parse_contains_property(), 932 "COPY": lambda self: self._parse_copy_property(), 933 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 934 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 935 "DEFINER": lambda self: self._parse_definer(), 936 "DETERMINISTIC": lambda self: self.expression( 937 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 938 ), 939 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 940 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 941 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 942 "DISTKEY": lambda self: self._parse_distkey(), 943 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 944 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 945 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 946 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 947 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 948 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 949 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 950 "FREESPACE": lambda self: self._parse_freespace(), 951 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 952 "HEAP": lambda self: self.expression(exp.HeapProperty), 953 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 954 "IMMUTABLE": lambda self: self.expression( 955 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 956 ), 957 "INHERITS": lambda self: self.expression( 958 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 959 ), 960 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 961 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 962 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 963 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 964 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 965 "LIKE": lambda self: self._parse_create_like(), 966 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 967 "LOCK": lambda self: self._parse_locking(), 968 "LOCKING": lambda self: self._parse_locking(), 969 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 970 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 971 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 972 "MODIFIES": lambda self: self._parse_modifies_property(), 973 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 974 "NO": lambda self: self._parse_no_property(), 975 "ON": lambda self: self._parse_on_property(), 976 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 977 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 978 "PARTITION": lambda self: self._parse_partitioned_of(), 979 "PARTITION BY": lambda self: self._parse_partitioned_by(), 980 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 981 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 982 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 983 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 984 "READS": lambda self: self._parse_reads_property(), 985 "REMOTE": lambda self: self._parse_remote_with_connection(), 986 "RETURNS": lambda self: self._parse_returns(), 987 "STRICT": lambda self: self.expression(exp.StrictProperty), 988 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 989 "ROW": lambda self: self._parse_row(), 990 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 991 "SAMPLE": lambda self: self.expression( 992 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 993 ), 994 "SECURE": lambda self: self.expression(exp.SecureProperty), 995 "SECURITY": lambda self: self._parse_security(), 996 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 997 "SETTINGS": lambda self: self._parse_settings_property(), 998 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 999 "SORTKEY": lambda self: self._parse_sortkey(), 1000 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1001 "STABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1003 ), 1004 "STORED": lambda self: self._parse_stored(), 1005 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1006 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1007 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1008 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1009 "TO": lambda self: self._parse_to_table(), 1010 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1011 "TRANSFORM": lambda self: self.expression( 1012 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1013 ), 1014 "TTL": lambda self: self._parse_ttl(), 1015 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1016 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1017 "VOLATILE": lambda self: self._parse_volatile_property(), 1018 "WITH": lambda self: self._parse_with_property(), 1019 } 1020 1021 CONSTRAINT_PARSERS = { 1022 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1023 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1024 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1025 "CHARACTER SET": lambda self: self.expression( 1026 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1027 ), 1028 "CHECK": lambda self: self.expression( 1029 exp.CheckColumnConstraint, 1030 this=self._parse_wrapped(self._parse_assignment), 1031 enforced=self._match_text_seq("ENFORCED"), 1032 ), 1033 "COLLATE": lambda self: self.expression( 1034 exp.CollateColumnConstraint, 1035 this=self._parse_identifier() or self._parse_column(), 1036 ), 1037 "COMMENT": lambda self: self.expression( 1038 exp.CommentColumnConstraint, this=self._parse_string() 1039 ), 1040 "COMPRESS": lambda self: self._parse_compress(), 1041 "CLUSTERED": lambda self: self.expression( 1042 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1043 ), 1044 "NONCLUSTERED": lambda self: self.expression( 1045 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1046 ), 1047 "DEFAULT": lambda self: self.expression( 1048 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1049 ), 1050 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1051 "EPHEMERAL": lambda self: self.expression( 1052 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1053 ), 1054 "EXCLUDE": lambda self: self.expression( 1055 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1056 ), 1057 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1058 "FORMAT": lambda self: self.expression( 1059 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1060 ), 1061 "GENERATED": lambda self: self._parse_generated_as_identity(), 1062 "IDENTITY": lambda self: self._parse_auto_increment(), 1063 "INLINE": lambda self: self._parse_inline(), 1064 "LIKE": lambda self: self._parse_create_like(), 1065 "NOT": lambda self: self._parse_not_constraint(), 1066 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1067 "ON": lambda self: ( 1068 self._match(TokenType.UPDATE) 1069 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1070 ) 1071 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1072 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1073 "PERIOD": lambda self: self._parse_period_for_system_time(), 1074 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1075 "REFERENCES": lambda self: self._parse_references(match=False), 1076 "TITLE": lambda self: self.expression( 1077 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1080 "UNIQUE": lambda self: self._parse_unique(), 1081 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1082 "WATERMARK": lambda self: self.expression( 1083 exp.WatermarkColumnConstraint, 1084 this=self._match(TokenType.FOR) and self._parse_column(), 1085 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1086 ), 1087 "WITH": lambda self: self.expression( 1088 exp.Properties, expressions=self._parse_wrapped_properties() 1089 ), 1090 } 1091 1092 ALTER_PARSERS = { 1093 "ADD": lambda self: self._parse_alter_table_add(), 1094 "AS": lambda self: self._parse_select(), 1095 "ALTER": lambda self: self._parse_alter_table_alter(), 1096 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1097 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1098 "DROP": lambda self: self._parse_alter_table_drop(), 1099 "RENAME": lambda self: self._parse_alter_table_rename(), 1100 "SET": lambda self: self._parse_alter_table_set(), 1101 "SWAP": lambda self: self.expression( 1102 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1103 ), 1104 } 1105 1106 ALTER_ALTER_PARSERS = { 1107 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1108 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1109 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1110 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1111 } 1112 1113 SCHEMA_UNNAMED_CONSTRAINTS = { 1114 "CHECK", 1115 "EXCLUDE", 1116 "FOREIGN KEY", 1117 "LIKE", 1118 "PERIOD", 1119 "PRIMARY KEY", 1120 "UNIQUE", 1121 "WATERMARK", 1122 } 1123 1124 NO_PAREN_FUNCTION_PARSERS = { 1125 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1126 "CASE": lambda self: self._parse_case(), 1127 "CONNECT_BY_ROOT": lambda self: self.expression( 1128 exp.ConnectByRoot, this=self._parse_column() 1129 ), 1130 "IF": lambda self: self._parse_if(), 1131 } 1132 1133 INVALID_FUNC_NAME_TOKENS = { 1134 TokenType.IDENTIFIER, 1135 TokenType.STRING, 1136 } 1137 1138 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1139 1140 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1141 1142 FUNCTION_PARSERS = { 1143 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1144 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1145 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1146 "DECODE": lambda self: self._parse_decode(), 1147 "EXTRACT": lambda self: self._parse_extract(), 1148 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1149 "GAP_FILL": lambda self: self._parse_gap_fill(), 1150 "JSON_OBJECT": lambda self: self._parse_json_object(), 1151 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1152 "JSON_TABLE": lambda self: self._parse_json_table(), 1153 "MATCH": lambda self: self._parse_match_against(), 1154 "NORMALIZE": lambda self: self._parse_normalize(), 1155 "OPENJSON": lambda self: self._parse_open_json(), 1156 "OVERLAY": lambda self: self._parse_overlay(), 1157 "POSITION": lambda self: self._parse_position(), 1158 "PREDICT": lambda self: self._parse_predict(), 1159 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1160 "STRING_AGG": lambda self: self._parse_string_agg(), 1161 "SUBSTRING": lambda self: self._parse_substring(), 1162 "TRIM": lambda self: self._parse_trim(), 1163 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1164 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1165 "XMLELEMENT": lambda self: self.expression( 1166 exp.XMLElement, 1167 this=self._match_text_seq("NAME") and self._parse_id_var(), 1168 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1169 ), 1170 "XMLTABLE": lambda self: self._parse_xml_table(), 1171 } 1172 1173 QUERY_MODIFIER_PARSERS = { 1174 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1175 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1176 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1177 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1178 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1179 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1180 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1181 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1182 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1183 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1184 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1185 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1186 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1187 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1188 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1189 TokenType.CLUSTER_BY: lambda self: ( 1190 "cluster", 1191 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1192 ), 1193 TokenType.DISTRIBUTE_BY: lambda self: ( 1194 "distribute", 1195 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1196 ), 1197 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1198 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1199 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1200 } 1201 1202 SET_PARSERS = { 1203 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1204 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1205 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1206 "TRANSACTION": lambda self: self._parse_set_transaction(), 1207 } 1208 1209 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1210 1211 TYPE_LITERAL_PARSERS = { 1212 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1213 } 1214 1215 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1216 1217 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1218 1219 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1220 1221 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1222 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1223 "ISOLATION": ( 1224 ("LEVEL", "REPEATABLE", "READ"), 1225 ("LEVEL", "READ", "COMMITTED"), 1226 ("LEVEL", "READ", "UNCOMITTED"), 1227 ("LEVEL", "SERIALIZABLE"), 1228 ), 1229 "READ": ("WRITE", "ONLY"), 1230 } 1231 1232 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1233 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1234 ) 1235 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1236 1237 CREATE_SEQUENCE: OPTIONS_TYPE = { 1238 "SCALE": ("EXTEND", "NOEXTEND"), 1239 "SHARD": ("EXTEND", "NOEXTEND"), 1240 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1241 **dict.fromkeys( 1242 ( 1243 "SESSION", 1244 "GLOBAL", 1245 "KEEP", 1246 "NOKEEP", 1247 "ORDER", 1248 "NOORDER", 1249 "NOCACHE", 1250 "CYCLE", 1251 "NOCYCLE", 1252 "NOMINVALUE", 1253 "NOMAXVALUE", 1254 "NOSCALE", 1255 "NOSHARD", 1256 ), 1257 tuple(), 1258 ), 1259 } 1260 1261 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1262 1263 USABLES: OPTIONS_TYPE = dict.fromkeys( 1264 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1265 ) 1266 1267 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1268 1269 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1270 "TYPE": ("EVOLUTION",), 1271 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1272 } 1273 1274 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1275 1276 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1277 1278 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1279 "NOT": ("ENFORCED",), 1280 "MATCH": ( 1281 "FULL", 1282 "PARTIAL", 1283 "SIMPLE", 1284 ), 1285 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1286 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1287 } 1288 1289 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1290 1291 CLONE_KEYWORDS = {"CLONE", "COPY"} 1292 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1293 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1294 1295 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1296 1297 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1298 1299 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1300 1301 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1302 1303 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1304 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1305 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1306 1307 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1308 1309 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1310 1311 ADD_CONSTRAINT_TOKENS = { 1312 TokenType.CONSTRAINT, 1313 TokenType.FOREIGN_KEY, 1314 TokenType.INDEX, 1315 TokenType.KEY, 1316 TokenType.PRIMARY_KEY, 1317 TokenType.UNIQUE, 1318 } 1319 1320 DISTINCT_TOKENS = {TokenType.DISTINCT} 1321 1322 NULL_TOKENS = {TokenType.NULL} 1323 1324 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1325 1326 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1327 1328 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1329 1330 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1331 1332 ODBC_DATETIME_LITERALS = { 1333 "d": exp.Date, 1334 "t": exp.Time, 1335 "ts": exp.Timestamp, 1336 } 1337 1338 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1339 1340 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1341 1342 # The style options for the DESCRIBE statement 1343 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1344 1345 # The style options for the ANALYZE statement 1346 ANALYZE_STYLES = { 1347 "BUFFER_USAGE_LIMIT", 1348 "FULL", 1349 "LOCAL", 1350 "NO_WRITE_TO_BINLOG", 1351 "SAMPLE", 1352 "SKIP_LOCKED", 1353 "VERBOSE", 1354 } 1355 1356 ANALYZE_EXPRESSION_PARSERS = { 1357 "ALL": lambda self: self._parse_analyze_columns(), 1358 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1359 "DELETE": lambda self: self._parse_analyze_delete(), 1360 "DROP": lambda self: self._parse_analyze_histogram(), 1361 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1362 "LIST": lambda self: self._parse_analyze_list(), 1363 "PREDICATE": lambda self: self._parse_analyze_columns(), 1364 "UPDATE": lambda self: self._parse_analyze_histogram(), 1365 "VALIDATE": lambda self: self._parse_analyze_validate(), 1366 } 1367 1368 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1369 1370 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1371 1372 OPERATION_MODIFIERS: t.Set[str] = set() 1373 1374 STRICT_CAST = True 1375 1376 PREFIXED_PIVOT_COLUMNS = False 1377 IDENTIFY_PIVOT_STRINGS = False 1378 1379 LOG_DEFAULTS_TO_LN = False 1380 1381 # Whether ADD is present for each column added by ALTER TABLE 1382 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1383 1384 # Whether the table sample clause expects CSV syntax 1385 TABLESAMPLE_CSV = False 1386 1387 # The default method used for table sampling 1388 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1389 1390 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1391 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1392 1393 # Whether the TRIM function expects the characters to trim as its first argument 1394 TRIM_PATTERN_FIRST = False 1395 1396 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1397 STRING_ALIASES = False 1398 1399 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1400 MODIFIERS_ATTACHED_TO_SET_OP = True 1401 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1402 1403 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1404 NO_PAREN_IF_COMMANDS = True 1405 1406 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1407 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1408 1409 # Whether the `:` operator is used to extract a value from a VARIANT column 1410 COLON_IS_VARIANT_EXTRACT = False 1411 1412 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1413 # If this is True and '(' is not found, the keyword will be treated as an identifier 1414 VALUES_FOLLOWED_BY_PAREN = True 1415 1416 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1417 SUPPORTS_IMPLICIT_UNNEST = False 1418 1419 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1420 INTERVAL_SPANS = True 1421 1422 # Whether a PARTITION clause can follow a table reference 1423 SUPPORTS_PARTITION_SELECTION = False 1424 1425 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1426 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1427 1428 # Whether the 'AS' keyword is optional in the CTE definition syntax 1429 OPTIONAL_ALIAS_TOKEN_CTE = True 1430 1431 __slots__ = ( 1432 "error_level", 1433 "error_message_context", 1434 "max_errors", 1435 "dialect", 1436 "sql", 1437 "errors", 1438 "_tokens", 1439 "_index", 1440 "_curr", 1441 "_next", 1442 "_prev", 1443 "_prev_comments", 1444 ) 1445 1446 # Autofilled 1447 SHOW_TRIE: t.Dict = {} 1448 SET_TRIE: t.Dict = {} 1449 1450 def __init__( 1451 self, 1452 error_level: t.Optional[ErrorLevel] = None, 1453 error_message_context: int = 100, 1454 max_errors: int = 3, 1455 dialect: DialectType = None, 1456 ): 1457 from sqlglot.dialects import Dialect 1458 1459 self.error_level = error_level or ErrorLevel.IMMEDIATE 1460 self.error_message_context = error_message_context 1461 self.max_errors = max_errors 1462 self.dialect = Dialect.get_or_raise(dialect) 1463 self.reset() 1464 1465 def reset(self): 1466 self.sql = "" 1467 self.errors = [] 1468 self._tokens = [] 1469 self._index = 0 1470 self._curr = None 1471 self._next = None 1472 self._prev = None 1473 self._prev_comments = None 1474 1475 def parse( 1476 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1477 ) -> t.List[t.Optional[exp.Expression]]: 1478 """ 1479 Parses a list of tokens and returns a list of syntax trees, one tree 1480 per parsed SQL statement. 1481 1482 Args: 1483 raw_tokens: The list of tokens. 1484 sql: The original SQL string, used to produce helpful debug messages. 1485 1486 Returns: 1487 The list of the produced syntax trees. 1488 """ 1489 return self._parse( 1490 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1491 ) 1492 1493 def parse_into( 1494 self, 1495 expression_types: exp.IntoType, 1496 raw_tokens: t.List[Token], 1497 sql: t.Optional[str] = None, 1498 ) -> t.List[t.Optional[exp.Expression]]: 1499 """ 1500 Parses a list of tokens into a given Expression type. If a collection of Expression 1501 types is given instead, this method will try to parse the token list into each one 1502 of them, stopping at the first for which the parsing succeeds. 1503 1504 Args: 1505 expression_types: The expression type(s) to try and parse the token list into. 1506 raw_tokens: The list of tokens. 1507 sql: The original SQL string, used to produce helpful debug messages. 1508 1509 Returns: 1510 The target Expression. 1511 """ 1512 errors = [] 1513 for expression_type in ensure_list(expression_types): 1514 parser = self.EXPRESSION_PARSERS.get(expression_type) 1515 if not parser: 1516 raise TypeError(f"No parser registered for {expression_type}") 1517 1518 try: 1519 return self._parse(parser, raw_tokens, sql) 1520 except ParseError as e: 1521 e.errors[0]["into_expression"] = expression_type 1522 errors.append(e) 1523 1524 raise ParseError( 1525 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1526 errors=merge_errors(errors), 1527 ) from errors[-1] 1528 1529 def _parse( 1530 self, 1531 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1532 raw_tokens: t.List[Token], 1533 sql: t.Optional[str] = None, 1534 ) -> t.List[t.Optional[exp.Expression]]: 1535 self.reset() 1536 self.sql = sql or "" 1537 1538 total = len(raw_tokens) 1539 chunks: t.List[t.List[Token]] = [[]] 1540 1541 for i, token in enumerate(raw_tokens): 1542 if token.token_type == TokenType.SEMICOLON: 1543 if token.comments: 1544 chunks.append([token]) 1545 1546 if i < total - 1: 1547 chunks.append([]) 1548 else: 1549 chunks[-1].append(token) 1550 1551 expressions = [] 1552 1553 for tokens in chunks: 1554 self._index = -1 1555 self._tokens = tokens 1556 self._advance() 1557 1558 expressions.append(parse_method(self)) 1559 1560 if self._index < len(self._tokens): 1561 self.raise_error("Invalid expression / Unexpected token") 1562 1563 self.check_errors() 1564 1565 return expressions 1566 1567 def check_errors(self) -> None: 1568 """Logs or raises any found errors, depending on the chosen error level setting.""" 1569 if self.error_level == ErrorLevel.WARN: 1570 for error in self.errors: 1571 logger.error(str(error)) 1572 elif self.error_level == ErrorLevel.RAISE and self.errors: 1573 raise ParseError( 1574 concat_messages(self.errors, self.max_errors), 1575 errors=merge_errors(self.errors), 1576 ) 1577 1578 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1579 """ 1580 Appends an error in the list of recorded errors or raises it, depending on the chosen 1581 error level setting. 1582 """ 1583 token = token or self._curr or self._prev or Token.string("") 1584 start = token.start 1585 end = token.end + 1 1586 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1587 highlight = self.sql[start:end] 1588 end_context = self.sql[end : end + self.error_message_context] 1589 1590 error = ParseError.new( 1591 f"{message}. Line {token.line}, Col: {token.col}.\n" 1592 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1593 description=message, 1594 line=token.line, 1595 col=token.col, 1596 start_context=start_context, 1597 highlight=highlight, 1598 end_context=end_context, 1599 ) 1600 1601 if self.error_level == ErrorLevel.IMMEDIATE: 1602 raise error 1603 1604 self.errors.append(error) 1605 1606 def expression( 1607 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1608 ) -> E: 1609 """ 1610 Creates a new, validated Expression. 1611 1612 Args: 1613 exp_class: The expression class to instantiate. 1614 comments: An optional list of comments to attach to the expression. 1615 kwargs: The arguments to set for the expression along with their respective values. 1616 1617 Returns: 1618 The target expression. 1619 """ 1620 instance = exp_class(**kwargs) 1621 instance.add_comments(comments) if comments else self._add_comments(instance) 1622 return self.validate_expression(instance) 1623 1624 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1625 if expression and self._prev_comments: 1626 expression.add_comments(self._prev_comments) 1627 self._prev_comments = None 1628 1629 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1630 """ 1631 Validates an Expression, making sure that all its mandatory arguments are set. 1632 1633 Args: 1634 expression: The expression to validate. 1635 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1636 1637 Returns: 1638 The validated expression. 1639 """ 1640 if self.error_level != ErrorLevel.IGNORE: 1641 for error_message in expression.error_messages(args): 1642 self.raise_error(error_message) 1643 1644 return expression 1645 1646 def _find_sql(self, start: Token, end: Token) -> str: 1647 return self.sql[start.start : end.end + 1] 1648 1649 def _is_connected(self) -> bool: 1650 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1651 1652 def _advance(self, times: int = 1) -> None: 1653 self._index += times 1654 self._curr = seq_get(self._tokens, self._index) 1655 self._next = seq_get(self._tokens, self._index + 1) 1656 1657 if self._index > 0: 1658 self._prev = self._tokens[self._index - 1] 1659 self._prev_comments = self._prev.comments 1660 else: 1661 self._prev = None 1662 self._prev_comments = None 1663 1664 def _retreat(self, index: int) -> None: 1665 if index != self._index: 1666 self._advance(index - self._index) 1667 1668 def _warn_unsupported(self) -> None: 1669 if len(self._tokens) <= 1: 1670 return 1671 1672 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1673 # interested in emitting a warning for the one being currently processed. 1674 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1675 1676 logger.warning( 1677 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1678 ) 1679 1680 def _parse_command(self) -> exp.Command: 1681 self._warn_unsupported() 1682 return self.expression( 1683 exp.Command, 1684 comments=self._prev_comments, 1685 this=self._prev.text.upper(), 1686 expression=self._parse_string(), 1687 ) 1688 1689 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1690 """ 1691 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1692 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1693 solve this by setting & resetting the parser state accordingly 1694 """ 1695 index = self._index 1696 error_level = self.error_level 1697 1698 self.error_level = ErrorLevel.IMMEDIATE 1699 try: 1700 this = parse_method() 1701 except ParseError: 1702 this = None 1703 finally: 1704 if not this or retreat: 1705 self._retreat(index) 1706 self.error_level = error_level 1707 1708 return this 1709 1710 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1711 start = self._prev 1712 exists = self._parse_exists() if allow_exists else None 1713 1714 self._match(TokenType.ON) 1715 1716 materialized = self._match_text_seq("MATERIALIZED") 1717 kind = self._match_set(self.CREATABLES) and self._prev 1718 if not kind: 1719 return self._parse_as_command(start) 1720 1721 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1722 this = self._parse_user_defined_function(kind=kind.token_type) 1723 elif kind.token_type == TokenType.TABLE: 1724 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1725 elif kind.token_type == TokenType.COLUMN: 1726 this = self._parse_column() 1727 else: 1728 this = self._parse_id_var() 1729 1730 self._match(TokenType.IS) 1731 1732 return self.expression( 1733 exp.Comment, 1734 this=this, 1735 kind=kind.text, 1736 expression=self._parse_string(), 1737 exists=exists, 1738 materialized=materialized, 1739 ) 1740 1741 def _parse_to_table( 1742 self, 1743 ) -> exp.ToTableProperty: 1744 table = self._parse_table_parts(schema=True) 1745 return self.expression(exp.ToTableProperty, this=table) 1746 1747 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1748 def _parse_ttl(self) -> exp.Expression: 1749 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1750 this = self._parse_bitwise() 1751 1752 if self._match_text_seq("DELETE"): 1753 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1754 if self._match_text_seq("RECOMPRESS"): 1755 return self.expression( 1756 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1757 ) 1758 if self._match_text_seq("TO", "DISK"): 1759 return self.expression( 1760 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1761 ) 1762 if self._match_text_seq("TO", "VOLUME"): 1763 return self.expression( 1764 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1765 ) 1766 1767 return this 1768 1769 expressions = self._parse_csv(_parse_ttl_action) 1770 where = self._parse_where() 1771 group = self._parse_group() 1772 1773 aggregates = None 1774 if group and self._match(TokenType.SET): 1775 aggregates = self._parse_csv(self._parse_set_item) 1776 1777 return self.expression( 1778 exp.MergeTreeTTL, 1779 expressions=expressions, 1780 where=where, 1781 group=group, 1782 aggregates=aggregates, 1783 ) 1784 1785 def _parse_statement(self) -> t.Optional[exp.Expression]: 1786 if self._curr is None: 1787 return None 1788 1789 if self._match_set(self.STATEMENT_PARSERS): 1790 comments = self._prev_comments 1791 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1792 stmt.add_comments(comments, prepend=True) 1793 return stmt 1794 1795 if self._match_set(self.dialect.tokenizer.COMMANDS): 1796 return self._parse_command() 1797 1798 expression = self._parse_expression() 1799 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1800 return self._parse_query_modifiers(expression) 1801 1802 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1803 start = self._prev 1804 temporary = self._match(TokenType.TEMPORARY) 1805 materialized = self._match_text_seq("MATERIALIZED") 1806 1807 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1808 if not kind: 1809 return self._parse_as_command(start) 1810 1811 concurrently = self._match_text_seq("CONCURRENTLY") 1812 if_exists = exists or self._parse_exists() 1813 1814 if kind == "COLUMN": 1815 this = self._parse_column() 1816 else: 1817 this = self._parse_table_parts( 1818 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1819 ) 1820 1821 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1822 1823 if self._match(TokenType.L_PAREN, advance=False): 1824 expressions = self._parse_wrapped_csv(self._parse_types) 1825 else: 1826 expressions = None 1827 1828 return self.expression( 1829 exp.Drop, 1830 exists=if_exists, 1831 this=this, 1832 expressions=expressions, 1833 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1834 temporary=temporary, 1835 materialized=materialized, 1836 cascade=self._match_text_seq("CASCADE"), 1837 constraints=self._match_text_seq("CONSTRAINTS"), 1838 purge=self._match_text_seq("PURGE"), 1839 cluster=cluster, 1840 concurrently=concurrently, 1841 ) 1842 1843 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1844 return ( 1845 self._match_text_seq("IF") 1846 and (not not_ or self._match(TokenType.NOT)) 1847 and self._match(TokenType.EXISTS) 1848 ) 1849 1850 def _parse_create(self) -> exp.Create | exp.Command: 1851 # Note: this can't be None because we've matched a statement parser 1852 start = self._prev 1853 1854 replace = ( 1855 start.token_type == TokenType.REPLACE 1856 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1857 or self._match_pair(TokenType.OR, TokenType.ALTER) 1858 ) 1859 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1860 1861 unique = self._match(TokenType.UNIQUE) 1862 1863 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1864 clustered = True 1865 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1866 "COLUMNSTORE" 1867 ): 1868 clustered = False 1869 else: 1870 clustered = None 1871 1872 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1873 self._advance() 1874 1875 properties = None 1876 create_token = self._match_set(self.CREATABLES) and self._prev 1877 1878 if not create_token: 1879 # exp.Properties.Location.POST_CREATE 1880 properties = self._parse_properties() 1881 create_token = self._match_set(self.CREATABLES) and self._prev 1882 1883 if not properties or not create_token: 1884 return self._parse_as_command(start) 1885 1886 concurrently = self._match_text_seq("CONCURRENTLY") 1887 exists = self._parse_exists(not_=True) 1888 this = None 1889 expression: t.Optional[exp.Expression] = None 1890 indexes = None 1891 no_schema_binding = None 1892 begin = None 1893 end = None 1894 clone = None 1895 1896 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1897 nonlocal properties 1898 if properties and temp_props: 1899 properties.expressions.extend(temp_props.expressions) 1900 elif temp_props: 1901 properties = temp_props 1902 1903 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1904 this = self._parse_user_defined_function(kind=create_token.token_type) 1905 1906 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1907 extend_props(self._parse_properties()) 1908 1909 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1910 extend_props(self._parse_properties()) 1911 1912 if not expression: 1913 if self._match(TokenType.COMMAND): 1914 expression = self._parse_as_command(self._prev) 1915 else: 1916 begin = self._match(TokenType.BEGIN) 1917 return_ = self._match_text_seq("RETURN") 1918 1919 if self._match(TokenType.STRING, advance=False): 1920 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1921 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1922 expression = self._parse_string() 1923 extend_props(self._parse_properties()) 1924 else: 1925 expression = self._parse_user_defined_function_expression() 1926 1927 end = self._match_text_seq("END") 1928 1929 if return_: 1930 expression = self.expression(exp.Return, this=expression) 1931 elif create_token.token_type == TokenType.INDEX: 1932 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1933 if not self._match(TokenType.ON): 1934 index = self._parse_id_var() 1935 anonymous = False 1936 else: 1937 index = None 1938 anonymous = True 1939 1940 this = self._parse_index(index=index, anonymous=anonymous) 1941 elif create_token.token_type in self.DB_CREATABLES: 1942 table_parts = self._parse_table_parts( 1943 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1944 ) 1945 1946 # exp.Properties.Location.POST_NAME 1947 self._match(TokenType.COMMA) 1948 extend_props(self._parse_properties(before=True)) 1949 1950 this = self._parse_schema(this=table_parts) 1951 1952 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1953 extend_props(self._parse_properties()) 1954 1955 self._match(TokenType.ALIAS) 1956 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1957 # exp.Properties.Location.POST_ALIAS 1958 extend_props(self._parse_properties()) 1959 1960 if create_token.token_type == TokenType.SEQUENCE: 1961 expression = self._parse_types() 1962 extend_props(self._parse_properties()) 1963 else: 1964 expression = self._parse_ddl_select() 1965 1966 if create_token.token_type == TokenType.TABLE: 1967 # exp.Properties.Location.POST_EXPRESSION 1968 extend_props(self._parse_properties()) 1969 1970 indexes = [] 1971 while True: 1972 index = self._parse_index() 1973 1974 # exp.Properties.Location.POST_INDEX 1975 extend_props(self._parse_properties()) 1976 if not index: 1977 break 1978 else: 1979 self._match(TokenType.COMMA) 1980 indexes.append(index) 1981 elif create_token.token_type == TokenType.VIEW: 1982 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1983 no_schema_binding = True 1984 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1985 extend_props(self._parse_properties()) 1986 1987 shallow = self._match_text_seq("SHALLOW") 1988 1989 if self._match_texts(self.CLONE_KEYWORDS): 1990 copy = self._prev.text.lower() == "copy" 1991 clone = self.expression( 1992 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1993 ) 1994 1995 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1996 return self._parse_as_command(start) 1997 1998 create_kind_text = create_token.text.upper() 1999 return self.expression( 2000 exp.Create, 2001 this=this, 2002 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2003 replace=replace, 2004 refresh=refresh, 2005 unique=unique, 2006 expression=expression, 2007 exists=exists, 2008 properties=properties, 2009 indexes=indexes, 2010 no_schema_binding=no_schema_binding, 2011 begin=begin, 2012 end=end, 2013 clone=clone, 2014 concurrently=concurrently, 2015 clustered=clustered, 2016 ) 2017 2018 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2019 seq = exp.SequenceProperties() 2020 2021 options = [] 2022 index = self._index 2023 2024 while self._curr: 2025 self._match(TokenType.COMMA) 2026 if self._match_text_seq("INCREMENT"): 2027 self._match_text_seq("BY") 2028 self._match_text_seq("=") 2029 seq.set("increment", self._parse_term()) 2030 elif self._match_text_seq("MINVALUE"): 2031 seq.set("minvalue", self._parse_term()) 2032 elif self._match_text_seq("MAXVALUE"): 2033 seq.set("maxvalue", self._parse_term()) 2034 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2035 self._match_text_seq("=") 2036 seq.set("start", self._parse_term()) 2037 elif self._match_text_seq("CACHE"): 2038 # T-SQL allows empty CACHE which is initialized dynamically 2039 seq.set("cache", self._parse_number() or True) 2040 elif self._match_text_seq("OWNED", "BY"): 2041 # "OWNED BY NONE" is the default 2042 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2043 else: 2044 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2045 if opt: 2046 options.append(opt) 2047 else: 2048 break 2049 2050 seq.set("options", options if options else None) 2051 return None if self._index == index else seq 2052 2053 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2054 # only used for teradata currently 2055 self._match(TokenType.COMMA) 2056 2057 kwargs = { 2058 "no": self._match_text_seq("NO"), 2059 "dual": self._match_text_seq("DUAL"), 2060 "before": self._match_text_seq("BEFORE"), 2061 "default": self._match_text_seq("DEFAULT"), 2062 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2063 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2064 "after": self._match_text_seq("AFTER"), 2065 "minimum": self._match_texts(("MIN", "MINIMUM")), 2066 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2067 } 2068 2069 if self._match_texts(self.PROPERTY_PARSERS): 2070 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2071 try: 2072 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2073 except TypeError: 2074 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2075 2076 return None 2077 2078 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2079 return self._parse_wrapped_csv(self._parse_property) 2080 2081 def _parse_property(self) -> t.Optional[exp.Expression]: 2082 if self._match_texts(self.PROPERTY_PARSERS): 2083 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2084 2085 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2086 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2087 2088 if self._match_text_seq("COMPOUND", "SORTKEY"): 2089 return self._parse_sortkey(compound=True) 2090 2091 if self._match_text_seq("SQL", "SECURITY"): 2092 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2093 2094 index = self._index 2095 key = self._parse_column() 2096 2097 if not self._match(TokenType.EQ): 2098 self._retreat(index) 2099 return self._parse_sequence_properties() 2100 2101 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2102 if isinstance(key, exp.Column): 2103 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2104 2105 value = self._parse_bitwise() or self._parse_var(any_token=True) 2106 2107 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2108 if isinstance(value, exp.Column): 2109 value = exp.var(value.name) 2110 2111 return self.expression(exp.Property, this=key, value=value) 2112 2113 def _parse_stored(self) -> exp.FileFormatProperty: 2114 self._match(TokenType.ALIAS) 2115 2116 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2117 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2118 2119 return self.expression( 2120 exp.FileFormatProperty, 2121 this=( 2122 self.expression( 2123 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2124 ) 2125 if input_format or output_format 2126 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2127 ), 2128 ) 2129 2130 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2131 field = self._parse_field() 2132 if isinstance(field, exp.Identifier) and not field.quoted: 2133 field = exp.var(field) 2134 2135 return field 2136 2137 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2138 self._match(TokenType.EQ) 2139 self._match(TokenType.ALIAS) 2140 2141 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2142 2143 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2144 properties = [] 2145 while True: 2146 if before: 2147 prop = self._parse_property_before() 2148 else: 2149 prop = self._parse_property() 2150 if not prop: 2151 break 2152 for p in ensure_list(prop): 2153 properties.append(p) 2154 2155 if properties: 2156 return self.expression(exp.Properties, expressions=properties) 2157 2158 return None 2159 2160 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2161 return self.expression( 2162 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2163 ) 2164 2165 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2166 if self._match_texts(("DEFINER", "INVOKER")): 2167 security_specifier = self._prev.text.upper() 2168 return self.expression(exp.SecurityProperty, this=security_specifier) 2169 return None 2170 2171 def _parse_settings_property(self) -> exp.SettingsProperty: 2172 return self.expression( 2173 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2174 ) 2175 2176 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2177 if self._index >= 2: 2178 pre_volatile_token = self._tokens[self._index - 2] 2179 else: 2180 pre_volatile_token = None 2181 2182 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2183 return exp.VolatileProperty() 2184 2185 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2186 2187 def _parse_retention_period(self) -> exp.Var: 2188 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2189 number = self._parse_number() 2190 number_str = f"{number} " if number else "" 2191 unit = self._parse_var(any_token=True) 2192 return exp.var(f"{number_str}{unit}") 2193 2194 def _parse_system_versioning_property( 2195 self, with_: bool = False 2196 ) -> exp.WithSystemVersioningProperty: 2197 self._match(TokenType.EQ) 2198 prop = self.expression( 2199 exp.WithSystemVersioningProperty, 2200 **{ # type: ignore 2201 "on": True, 2202 "with": with_, 2203 }, 2204 ) 2205 2206 if self._match_text_seq("OFF"): 2207 prop.set("on", False) 2208 return prop 2209 2210 self._match(TokenType.ON) 2211 if self._match(TokenType.L_PAREN): 2212 while self._curr and not self._match(TokenType.R_PAREN): 2213 if self._match_text_seq("HISTORY_TABLE", "="): 2214 prop.set("this", self._parse_table_parts()) 2215 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2216 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2217 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2218 prop.set("retention_period", self._parse_retention_period()) 2219 2220 self._match(TokenType.COMMA) 2221 2222 return prop 2223 2224 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2225 self._match(TokenType.EQ) 2226 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2227 prop = self.expression(exp.DataDeletionProperty, on=on) 2228 2229 if self._match(TokenType.L_PAREN): 2230 while self._curr and not self._match(TokenType.R_PAREN): 2231 if self._match_text_seq("FILTER_COLUMN", "="): 2232 prop.set("filter_column", self._parse_column()) 2233 elif self._match_text_seq("RETENTION_PERIOD", "="): 2234 prop.set("retention_period", self._parse_retention_period()) 2235 2236 self._match(TokenType.COMMA) 2237 2238 return prop 2239 2240 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2241 kind = "HASH" 2242 expressions: t.Optional[t.List[exp.Expression]] = None 2243 if self._match_text_seq("BY", "HASH"): 2244 expressions = self._parse_wrapped_csv(self._parse_id_var) 2245 elif self._match_text_seq("BY", "RANDOM"): 2246 kind = "RANDOM" 2247 2248 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2249 buckets: t.Optional[exp.Expression] = None 2250 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2251 buckets = self._parse_number() 2252 2253 return self.expression( 2254 exp.DistributedByProperty, 2255 expressions=expressions, 2256 kind=kind, 2257 buckets=buckets, 2258 order=self._parse_order(), 2259 ) 2260 2261 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2262 self._match_text_seq("KEY") 2263 expressions = self._parse_wrapped_id_vars() 2264 return self.expression(expr_type, expressions=expressions) 2265 2266 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2267 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2268 prop = self._parse_system_versioning_property(with_=True) 2269 self._match_r_paren() 2270 return prop 2271 2272 if self._match(TokenType.L_PAREN, advance=False): 2273 return self._parse_wrapped_properties() 2274 2275 if self._match_text_seq("JOURNAL"): 2276 return self._parse_withjournaltable() 2277 2278 if self._match_texts(self.VIEW_ATTRIBUTES): 2279 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2280 2281 if self._match_text_seq("DATA"): 2282 return self._parse_withdata(no=False) 2283 elif self._match_text_seq("NO", "DATA"): 2284 return self._parse_withdata(no=True) 2285 2286 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2287 return self._parse_serde_properties(with_=True) 2288 2289 if self._match(TokenType.SCHEMA): 2290 return self.expression( 2291 exp.WithSchemaBindingProperty, 2292 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2293 ) 2294 2295 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2296 return self.expression( 2297 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2298 ) 2299 2300 if not self._next: 2301 return None 2302 2303 return self._parse_withisolatedloading() 2304 2305 def _parse_procedure_option(self) -> exp.Expression | None: 2306 if self._match_text_seq("EXECUTE", "AS"): 2307 return self.expression( 2308 exp.ExecuteAsProperty, 2309 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2310 or self._parse_string(), 2311 ) 2312 2313 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2314 2315 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2316 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2317 self._match(TokenType.EQ) 2318 2319 user = self._parse_id_var() 2320 self._match(TokenType.PARAMETER) 2321 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2322 2323 if not user or not host: 2324 return None 2325 2326 return exp.DefinerProperty(this=f"{user}@{host}") 2327 2328 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2329 self._match(TokenType.TABLE) 2330 self._match(TokenType.EQ) 2331 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2332 2333 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2334 return self.expression(exp.LogProperty, no=no) 2335 2336 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2337 return self.expression(exp.JournalProperty, **kwargs) 2338 2339 def _parse_checksum(self) -> exp.ChecksumProperty: 2340 self._match(TokenType.EQ) 2341 2342 on = None 2343 if self._match(TokenType.ON): 2344 on = True 2345 elif self._match_text_seq("OFF"): 2346 on = False 2347 2348 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2349 2350 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2351 return self.expression( 2352 exp.Cluster, 2353 expressions=( 2354 self._parse_wrapped_csv(self._parse_ordered) 2355 if wrapped 2356 else self._parse_csv(self._parse_ordered) 2357 ), 2358 ) 2359 2360 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2361 self._match_text_seq("BY") 2362 2363 self._match_l_paren() 2364 expressions = self._parse_csv(self._parse_column) 2365 self._match_r_paren() 2366 2367 if self._match_text_seq("SORTED", "BY"): 2368 self._match_l_paren() 2369 sorted_by = self._parse_csv(self._parse_ordered) 2370 self._match_r_paren() 2371 else: 2372 sorted_by = None 2373 2374 self._match(TokenType.INTO) 2375 buckets = self._parse_number() 2376 self._match_text_seq("BUCKETS") 2377 2378 return self.expression( 2379 exp.ClusteredByProperty, 2380 expressions=expressions, 2381 sorted_by=sorted_by, 2382 buckets=buckets, 2383 ) 2384 2385 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2386 if not self._match_text_seq("GRANTS"): 2387 self._retreat(self._index - 1) 2388 return None 2389 2390 return self.expression(exp.CopyGrantsProperty) 2391 2392 def _parse_freespace(self) -> exp.FreespaceProperty: 2393 self._match(TokenType.EQ) 2394 return self.expression( 2395 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2396 ) 2397 2398 def _parse_mergeblockratio( 2399 self, no: bool = False, default: bool = False 2400 ) -> exp.MergeBlockRatioProperty: 2401 if self._match(TokenType.EQ): 2402 return self.expression( 2403 exp.MergeBlockRatioProperty, 2404 this=self._parse_number(), 2405 percent=self._match(TokenType.PERCENT), 2406 ) 2407 2408 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2409 2410 def _parse_datablocksize( 2411 self, 2412 default: t.Optional[bool] = None, 2413 minimum: t.Optional[bool] = None, 2414 maximum: t.Optional[bool] = None, 2415 ) -> exp.DataBlocksizeProperty: 2416 self._match(TokenType.EQ) 2417 size = self._parse_number() 2418 2419 units = None 2420 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2421 units = self._prev.text 2422 2423 return self.expression( 2424 exp.DataBlocksizeProperty, 2425 size=size, 2426 units=units, 2427 default=default, 2428 minimum=minimum, 2429 maximum=maximum, 2430 ) 2431 2432 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2433 self._match(TokenType.EQ) 2434 always = self._match_text_seq("ALWAYS") 2435 manual = self._match_text_seq("MANUAL") 2436 never = self._match_text_seq("NEVER") 2437 default = self._match_text_seq("DEFAULT") 2438 2439 autotemp = None 2440 if self._match_text_seq("AUTOTEMP"): 2441 autotemp = self._parse_schema() 2442 2443 return self.expression( 2444 exp.BlockCompressionProperty, 2445 always=always, 2446 manual=manual, 2447 never=never, 2448 default=default, 2449 autotemp=autotemp, 2450 ) 2451 2452 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2453 index = self._index 2454 no = self._match_text_seq("NO") 2455 concurrent = self._match_text_seq("CONCURRENT") 2456 2457 if not self._match_text_seq("ISOLATED", "LOADING"): 2458 self._retreat(index) 2459 return None 2460 2461 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2462 return self.expression( 2463 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2464 ) 2465 2466 def _parse_locking(self) -> exp.LockingProperty: 2467 if self._match(TokenType.TABLE): 2468 kind = "TABLE" 2469 elif self._match(TokenType.VIEW): 2470 kind = "VIEW" 2471 elif self._match(TokenType.ROW): 2472 kind = "ROW" 2473 elif self._match_text_seq("DATABASE"): 2474 kind = "DATABASE" 2475 else: 2476 kind = None 2477 2478 if kind in ("DATABASE", "TABLE", "VIEW"): 2479 this = self._parse_table_parts() 2480 else: 2481 this = None 2482 2483 if self._match(TokenType.FOR): 2484 for_or_in = "FOR" 2485 elif self._match(TokenType.IN): 2486 for_or_in = "IN" 2487 else: 2488 for_or_in = None 2489 2490 if self._match_text_seq("ACCESS"): 2491 lock_type = "ACCESS" 2492 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2493 lock_type = "EXCLUSIVE" 2494 elif self._match_text_seq("SHARE"): 2495 lock_type = "SHARE" 2496 elif self._match_text_seq("READ"): 2497 lock_type = "READ" 2498 elif self._match_text_seq("WRITE"): 2499 lock_type = "WRITE" 2500 elif self._match_text_seq("CHECKSUM"): 2501 lock_type = "CHECKSUM" 2502 else: 2503 lock_type = None 2504 2505 override = self._match_text_seq("OVERRIDE") 2506 2507 return self.expression( 2508 exp.LockingProperty, 2509 this=this, 2510 kind=kind, 2511 for_or_in=for_or_in, 2512 lock_type=lock_type, 2513 override=override, 2514 ) 2515 2516 def _parse_partition_by(self) -> t.List[exp.Expression]: 2517 if self._match(TokenType.PARTITION_BY): 2518 return self._parse_csv(self._parse_assignment) 2519 return [] 2520 2521 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2522 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2523 if self._match_text_seq("MINVALUE"): 2524 return exp.var("MINVALUE") 2525 if self._match_text_seq("MAXVALUE"): 2526 return exp.var("MAXVALUE") 2527 return self._parse_bitwise() 2528 2529 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2530 expression = None 2531 from_expressions = None 2532 to_expressions = None 2533 2534 if self._match(TokenType.IN): 2535 this = self._parse_wrapped_csv(self._parse_bitwise) 2536 elif self._match(TokenType.FROM): 2537 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2538 self._match_text_seq("TO") 2539 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2540 elif self._match_text_seq("WITH", "(", "MODULUS"): 2541 this = self._parse_number() 2542 self._match_text_seq(",", "REMAINDER") 2543 expression = self._parse_number() 2544 self._match_r_paren() 2545 else: 2546 self.raise_error("Failed to parse partition bound spec.") 2547 2548 return self.expression( 2549 exp.PartitionBoundSpec, 2550 this=this, 2551 expression=expression, 2552 from_expressions=from_expressions, 2553 to_expressions=to_expressions, 2554 ) 2555 2556 # https://www.postgresql.org/docs/current/sql-createtable.html 2557 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2558 if not self._match_text_seq("OF"): 2559 self._retreat(self._index - 1) 2560 return None 2561 2562 this = self._parse_table(schema=True) 2563 2564 if self._match(TokenType.DEFAULT): 2565 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2566 elif self._match_text_seq("FOR", "VALUES"): 2567 expression = self._parse_partition_bound_spec() 2568 else: 2569 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2570 2571 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2572 2573 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2574 self._match(TokenType.EQ) 2575 return self.expression( 2576 exp.PartitionedByProperty, 2577 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2578 ) 2579 2580 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2581 if self._match_text_seq("AND", "STATISTICS"): 2582 statistics = True 2583 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2584 statistics = False 2585 else: 2586 statistics = None 2587 2588 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2589 2590 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2591 if self._match_text_seq("SQL"): 2592 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2593 return None 2594 2595 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2596 if self._match_text_seq("SQL", "DATA"): 2597 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2598 return None 2599 2600 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2601 if self._match_text_seq("PRIMARY", "INDEX"): 2602 return exp.NoPrimaryIndexProperty() 2603 if self._match_text_seq("SQL"): 2604 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2605 return None 2606 2607 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2608 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2609 return exp.OnCommitProperty() 2610 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2611 return exp.OnCommitProperty(delete=True) 2612 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2613 2614 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2615 if self._match_text_seq("SQL", "DATA"): 2616 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2617 return None 2618 2619 def _parse_distkey(self) -> exp.DistKeyProperty: 2620 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2621 2622 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2623 table = self._parse_table(schema=True) 2624 2625 options = [] 2626 while self._match_texts(("INCLUDING", "EXCLUDING")): 2627 this = self._prev.text.upper() 2628 2629 id_var = self._parse_id_var() 2630 if not id_var: 2631 return None 2632 2633 options.append( 2634 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2635 ) 2636 2637 return self.expression(exp.LikeProperty, this=table, expressions=options) 2638 2639 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2640 return self.expression( 2641 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2642 ) 2643 2644 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2645 self._match(TokenType.EQ) 2646 return self.expression( 2647 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2648 ) 2649 2650 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2651 self._match_text_seq("WITH", "CONNECTION") 2652 return self.expression( 2653 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2654 ) 2655 2656 def _parse_returns(self) -> exp.ReturnsProperty: 2657 value: t.Optional[exp.Expression] 2658 null = None 2659 is_table = self._match(TokenType.TABLE) 2660 2661 if is_table: 2662 if self._match(TokenType.LT): 2663 value = self.expression( 2664 exp.Schema, 2665 this="TABLE", 2666 expressions=self._parse_csv(self._parse_struct_types), 2667 ) 2668 if not self._match(TokenType.GT): 2669 self.raise_error("Expecting >") 2670 else: 2671 value = self._parse_schema(exp.var("TABLE")) 2672 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2673 null = True 2674 value = None 2675 else: 2676 value = self._parse_types() 2677 2678 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2679 2680 def _parse_describe(self) -> exp.Describe: 2681 kind = self._match_set(self.CREATABLES) and self._prev.text 2682 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2683 if self._match(TokenType.DOT): 2684 style = None 2685 self._retreat(self._index - 2) 2686 2687 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2688 2689 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2690 this = self._parse_statement() 2691 else: 2692 this = self._parse_table(schema=True) 2693 2694 properties = self._parse_properties() 2695 expressions = properties.expressions if properties else None 2696 partition = self._parse_partition() 2697 return self.expression( 2698 exp.Describe, 2699 this=this, 2700 style=style, 2701 kind=kind, 2702 expressions=expressions, 2703 partition=partition, 2704 format=format, 2705 ) 2706 2707 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2708 kind = self._prev.text.upper() 2709 expressions = [] 2710 2711 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2712 if self._match(TokenType.WHEN): 2713 expression = self._parse_disjunction() 2714 self._match(TokenType.THEN) 2715 else: 2716 expression = None 2717 2718 else_ = self._match(TokenType.ELSE) 2719 2720 if not self._match(TokenType.INTO): 2721 return None 2722 2723 return self.expression( 2724 exp.ConditionalInsert, 2725 this=self.expression( 2726 exp.Insert, 2727 this=self._parse_table(schema=True), 2728 expression=self._parse_derived_table_values(), 2729 ), 2730 expression=expression, 2731 else_=else_, 2732 ) 2733 2734 expression = parse_conditional_insert() 2735 while expression is not None: 2736 expressions.append(expression) 2737 expression = parse_conditional_insert() 2738 2739 return self.expression( 2740 exp.MultitableInserts, 2741 kind=kind, 2742 comments=comments, 2743 expressions=expressions, 2744 source=self._parse_table(), 2745 ) 2746 2747 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2748 comments = [] 2749 hint = self._parse_hint() 2750 overwrite = self._match(TokenType.OVERWRITE) 2751 ignore = self._match(TokenType.IGNORE) 2752 local = self._match_text_seq("LOCAL") 2753 alternative = None 2754 is_function = None 2755 2756 if self._match_text_seq("DIRECTORY"): 2757 this: t.Optional[exp.Expression] = self.expression( 2758 exp.Directory, 2759 this=self._parse_var_or_string(), 2760 local=local, 2761 row_format=self._parse_row_format(match_row=True), 2762 ) 2763 else: 2764 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2765 comments += ensure_list(self._prev_comments) 2766 return self._parse_multitable_inserts(comments) 2767 2768 if self._match(TokenType.OR): 2769 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2770 2771 self._match(TokenType.INTO) 2772 comments += ensure_list(self._prev_comments) 2773 self._match(TokenType.TABLE) 2774 is_function = self._match(TokenType.FUNCTION) 2775 2776 this = ( 2777 self._parse_table(schema=True, parse_partition=True) 2778 if not is_function 2779 else self._parse_function() 2780 ) 2781 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2782 this.set("alias", self._parse_table_alias()) 2783 2784 returning = self._parse_returning() 2785 2786 return self.expression( 2787 exp.Insert, 2788 comments=comments, 2789 hint=hint, 2790 is_function=is_function, 2791 this=this, 2792 stored=self._match_text_seq("STORED") and self._parse_stored(), 2793 by_name=self._match_text_seq("BY", "NAME"), 2794 exists=self._parse_exists(), 2795 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2796 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2797 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2798 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2799 conflict=self._parse_on_conflict(), 2800 returning=returning or self._parse_returning(), 2801 overwrite=overwrite, 2802 alternative=alternative, 2803 ignore=ignore, 2804 source=self._match(TokenType.TABLE) and self._parse_table(), 2805 ) 2806 2807 def _parse_kill(self) -> exp.Kill: 2808 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2809 2810 return self.expression( 2811 exp.Kill, 2812 this=self._parse_primary(), 2813 kind=kind, 2814 ) 2815 2816 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2817 conflict = self._match_text_seq("ON", "CONFLICT") 2818 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2819 2820 if not conflict and not duplicate: 2821 return None 2822 2823 conflict_keys = None 2824 constraint = None 2825 2826 if conflict: 2827 if self._match_text_seq("ON", "CONSTRAINT"): 2828 constraint = self._parse_id_var() 2829 elif self._match(TokenType.L_PAREN): 2830 conflict_keys = self._parse_csv(self._parse_id_var) 2831 self._match_r_paren() 2832 2833 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2834 if self._prev.token_type == TokenType.UPDATE: 2835 self._match(TokenType.SET) 2836 expressions = self._parse_csv(self._parse_equality) 2837 else: 2838 expressions = None 2839 2840 return self.expression( 2841 exp.OnConflict, 2842 duplicate=duplicate, 2843 expressions=expressions, 2844 action=action, 2845 conflict_keys=conflict_keys, 2846 constraint=constraint, 2847 where=self._parse_where(), 2848 ) 2849 2850 def _parse_returning(self) -> t.Optional[exp.Returning]: 2851 if not self._match(TokenType.RETURNING): 2852 return None 2853 return self.expression( 2854 exp.Returning, 2855 expressions=self._parse_csv(self._parse_expression), 2856 into=self._match(TokenType.INTO) and self._parse_table_part(), 2857 ) 2858 2859 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2860 if not self._match(TokenType.FORMAT): 2861 return None 2862 return self._parse_row_format() 2863 2864 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2865 index = self._index 2866 with_ = with_ or self._match_text_seq("WITH") 2867 2868 if not self._match(TokenType.SERDE_PROPERTIES): 2869 self._retreat(index) 2870 return None 2871 return self.expression( 2872 exp.SerdeProperties, 2873 **{ # type: ignore 2874 "expressions": self._parse_wrapped_properties(), 2875 "with": with_, 2876 }, 2877 ) 2878 2879 def _parse_row_format( 2880 self, match_row: bool = False 2881 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2882 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2883 return None 2884 2885 if self._match_text_seq("SERDE"): 2886 this = self._parse_string() 2887 2888 serde_properties = self._parse_serde_properties() 2889 2890 return self.expression( 2891 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2892 ) 2893 2894 self._match_text_seq("DELIMITED") 2895 2896 kwargs = {} 2897 2898 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2899 kwargs["fields"] = self._parse_string() 2900 if self._match_text_seq("ESCAPED", "BY"): 2901 kwargs["escaped"] = self._parse_string() 2902 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2903 kwargs["collection_items"] = self._parse_string() 2904 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2905 kwargs["map_keys"] = self._parse_string() 2906 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2907 kwargs["lines"] = self._parse_string() 2908 if self._match_text_seq("NULL", "DEFINED", "AS"): 2909 kwargs["null"] = self._parse_string() 2910 2911 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2912 2913 def _parse_load(self) -> exp.LoadData | exp.Command: 2914 if self._match_text_seq("DATA"): 2915 local = self._match_text_seq("LOCAL") 2916 self._match_text_seq("INPATH") 2917 inpath = self._parse_string() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 self._match_pair(TokenType.INTO, TokenType.TABLE) 2920 2921 return self.expression( 2922 exp.LoadData, 2923 this=self._parse_table(schema=True), 2924 local=local, 2925 overwrite=overwrite, 2926 inpath=inpath, 2927 partition=self._parse_partition(), 2928 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2929 serde=self._match_text_seq("SERDE") and self._parse_string(), 2930 ) 2931 return self._parse_as_command(self._prev) 2932 2933 def _parse_delete(self) -> exp.Delete: 2934 # This handles MySQL's "Multiple-Table Syntax" 2935 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2936 tables = None 2937 if not self._match(TokenType.FROM, advance=False): 2938 tables = self._parse_csv(self._parse_table) or None 2939 2940 returning = self._parse_returning() 2941 2942 return self.expression( 2943 exp.Delete, 2944 tables=tables, 2945 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2946 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2947 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2948 where=self._parse_where(), 2949 returning=returning or self._parse_returning(), 2950 limit=self._parse_limit(), 2951 ) 2952 2953 def _parse_update(self) -> exp.Update: 2954 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2955 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2956 returning = self._parse_returning() 2957 return self.expression( 2958 exp.Update, 2959 **{ # type: ignore 2960 "this": this, 2961 "expressions": expressions, 2962 "from": self._parse_from(joins=True), 2963 "where": self._parse_where(), 2964 "returning": returning or self._parse_returning(), 2965 "order": self._parse_order(), 2966 "limit": self._parse_limit(), 2967 }, 2968 ) 2969 2970 def _parse_uncache(self) -> exp.Uncache: 2971 if not self._match(TokenType.TABLE): 2972 self.raise_error("Expecting TABLE after UNCACHE") 2973 2974 return self.expression( 2975 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2976 ) 2977 2978 def _parse_cache(self) -> exp.Cache: 2979 lazy = self._match_text_seq("LAZY") 2980 self._match(TokenType.TABLE) 2981 table = self._parse_table(schema=True) 2982 2983 options = [] 2984 if self._match_text_seq("OPTIONS"): 2985 self._match_l_paren() 2986 k = self._parse_string() 2987 self._match(TokenType.EQ) 2988 v = self._parse_string() 2989 options = [k, v] 2990 self._match_r_paren() 2991 2992 self._match(TokenType.ALIAS) 2993 return self.expression( 2994 exp.Cache, 2995 this=table, 2996 lazy=lazy, 2997 options=options, 2998 expression=self._parse_select(nested=True), 2999 ) 3000 3001 def _parse_partition(self) -> t.Optional[exp.Partition]: 3002 if not self._match_texts(self.PARTITION_KEYWORDS): 3003 return None 3004 3005 return self.expression( 3006 exp.Partition, 3007 subpartition=self._prev.text.upper() == "SUBPARTITION", 3008 expressions=self._parse_wrapped_csv(self._parse_assignment), 3009 ) 3010 3011 def _parse_value(self) -> t.Optional[exp.Tuple]: 3012 def _parse_value_expression() -> t.Optional[exp.Expression]: 3013 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3014 return exp.var(self._prev.text.upper()) 3015 return self._parse_expression() 3016 3017 if self._match(TokenType.L_PAREN): 3018 expressions = self._parse_csv(_parse_value_expression) 3019 self._match_r_paren() 3020 return self.expression(exp.Tuple, expressions=expressions) 3021 3022 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3023 expression = self._parse_expression() 3024 if expression: 3025 return self.expression(exp.Tuple, expressions=[expression]) 3026 return None 3027 3028 def _parse_projections(self) -> t.List[exp.Expression]: 3029 return self._parse_expressions() 3030 3031 def _parse_select( 3032 self, 3033 nested: bool = False, 3034 table: bool = False, 3035 parse_subquery_alias: bool = True, 3036 parse_set_operation: bool = True, 3037 ) -> t.Optional[exp.Expression]: 3038 cte = self._parse_with() 3039 3040 if cte: 3041 this = self._parse_statement() 3042 3043 if not this: 3044 self.raise_error("Failed to parse any statement following CTE") 3045 return cte 3046 3047 if "with" in this.arg_types: 3048 this.set("with", cte) 3049 else: 3050 self.raise_error(f"{this.key} does not support CTE") 3051 this = cte 3052 3053 return this 3054 3055 # duckdb supports leading with FROM x 3056 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3057 3058 if self._match(TokenType.SELECT): 3059 comments = self._prev_comments 3060 3061 hint = self._parse_hint() 3062 3063 if self._next and not self._next.token_type == TokenType.DOT: 3064 all_ = self._match(TokenType.ALL) 3065 distinct = self._match_set(self.DISTINCT_TOKENS) 3066 else: 3067 all_, distinct = None, None 3068 3069 kind = ( 3070 self._match(TokenType.ALIAS) 3071 and self._match_texts(("STRUCT", "VALUE")) 3072 and self._prev.text.upper() 3073 ) 3074 3075 if distinct: 3076 distinct = self.expression( 3077 exp.Distinct, 3078 on=self._parse_value() if self._match(TokenType.ON) else None, 3079 ) 3080 3081 if all_ and distinct: 3082 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3083 3084 operation_modifiers = [] 3085 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3086 operation_modifiers.append(exp.var(self._prev.text.upper())) 3087 3088 limit = self._parse_limit(top=True) 3089 projections = self._parse_projections() 3090 3091 this = self.expression( 3092 exp.Select, 3093 kind=kind, 3094 hint=hint, 3095 distinct=distinct, 3096 expressions=projections, 3097 limit=limit, 3098 operation_modifiers=operation_modifiers or None, 3099 ) 3100 this.comments = comments 3101 3102 into = self._parse_into() 3103 if into: 3104 this.set("into", into) 3105 3106 if not from_: 3107 from_ = self._parse_from() 3108 3109 if from_: 3110 this.set("from", from_) 3111 3112 this = self._parse_query_modifiers(this) 3113 elif (table or nested) and self._match(TokenType.L_PAREN): 3114 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3115 this = self._parse_simplified_pivot( 3116 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3117 ) 3118 elif self._match(TokenType.FROM): 3119 from_ = self._parse_from(skip_from_token=True) 3120 # Support parentheses for duckdb FROM-first syntax 3121 select = self._parse_select() 3122 if select: 3123 select.set("from", from_) 3124 this = select 3125 else: 3126 this = exp.select("*").from_(t.cast(exp.From, from_)) 3127 else: 3128 this = ( 3129 self._parse_table() 3130 if table 3131 else self._parse_select(nested=True, parse_set_operation=False) 3132 ) 3133 3134 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3135 # in case a modifier (e.g. join) is following 3136 if table and isinstance(this, exp.Values) and this.alias: 3137 alias = this.args["alias"].pop() 3138 this = exp.Table(this=this, alias=alias) 3139 3140 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3141 3142 self._match_r_paren() 3143 3144 # We return early here so that the UNION isn't attached to the subquery by the 3145 # following call to _parse_set_operations, but instead becomes the parent node 3146 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3147 elif self._match(TokenType.VALUES, advance=False): 3148 this = self._parse_derived_table_values() 3149 elif from_: 3150 this = exp.select("*").from_(from_.this, copy=False) 3151 elif self._match(TokenType.SUMMARIZE): 3152 table = self._match(TokenType.TABLE) 3153 this = self._parse_select() or self._parse_string() or self._parse_table() 3154 return self.expression(exp.Summarize, this=this, table=table) 3155 elif self._match(TokenType.DESCRIBE): 3156 this = self._parse_describe() 3157 elif self._match_text_seq("STREAM"): 3158 this = self._parse_function() 3159 if this: 3160 this = self.expression(exp.Stream, this=this) 3161 else: 3162 self._retreat(self._index - 1) 3163 else: 3164 this = None 3165 3166 return self._parse_set_operations(this) if parse_set_operation else this 3167 3168 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3169 if not skip_with_token and not self._match(TokenType.WITH): 3170 return None 3171 3172 comments = self._prev_comments 3173 recursive = self._match(TokenType.RECURSIVE) 3174 3175 last_comments = None 3176 expressions = [] 3177 while True: 3178 expressions.append(self._parse_cte()) 3179 if last_comments: 3180 expressions[-1].add_comments(last_comments) 3181 3182 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3183 break 3184 else: 3185 self._match(TokenType.WITH) 3186 3187 last_comments = self._prev_comments 3188 3189 return self.expression( 3190 exp.With, comments=comments, expressions=expressions, recursive=recursive 3191 ) 3192 3193 def _parse_cte(self) -> t.Optional[exp.CTE]: 3194 index = self._index 3195 3196 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3197 if not alias or not alias.this: 3198 self.raise_error("Expected CTE to have alias") 3199 3200 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3201 self._retreat(index) 3202 return None 3203 3204 comments = self._prev_comments 3205 3206 if self._match_text_seq("NOT", "MATERIALIZED"): 3207 materialized = False 3208 elif self._match_text_seq("MATERIALIZED"): 3209 materialized = True 3210 else: 3211 materialized = None 3212 3213 cte = self.expression( 3214 exp.CTE, 3215 this=self._parse_wrapped(self._parse_statement), 3216 alias=alias, 3217 materialized=materialized, 3218 comments=comments, 3219 ) 3220 3221 if isinstance(cte.this, exp.Values): 3222 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3223 3224 return cte 3225 3226 def _parse_table_alias( 3227 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3228 ) -> t.Optional[exp.TableAlias]: 3229 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3230 # so this section tries to parse the clause version and if it fails, it treats the token 3231 # as an identifier (alias) 3232 if self._can_parse_limit_or_offset(): 3233 return None 3234 3235 any_token = self._match(TokenType.ALIAS) 3236 alias = ( 3237 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3238 or self._parse_string_as_identifier() 3239 ) 3240 3241 index = self._index 3242 if self._match(TokenType.L_PAREN): 3243 columns = self._parse_csv(self._parse_function_parameter) 3244 self._match_r_paren() if columns else self._retreat(index) 3245 else: 3246 columns = None 3247 3248 if not alias and not columns: 3249 return None 3250 3251 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3252 3253 # We bubble up comments from the Identifier to the TableAlias 3254 if isinstance(alias, exp.Identifier): 3255 table_alias.add_comments(alias.pop_comments()) 3256 3257 return table_alias 3258 3259 def _parse_subquery( 3260 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3261 ) -> t.Optional[exp.Subquery]: 3262 if not this: 3263 return None 3264 3265 return self.expression( 3266 exp.Subquery, 3267 this=this, 3268 pivots=self._parse_pivots(), 3269 alias=self._parse_table_alias() if parse_alias else None, 3270 sample=self._parse_table_sample(), 3271 ) 3272 3273 def _implicit_unnests_to_explicit(self, this: E) -> E: 3274 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3275 3276 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3277 for i, join in enumerate(this.args.get("joins") or []): 3278 table = join.this 3279 normalized_table = table.copy() 3280 normalized_table.meta["maybe_column"] = True 3281 normalized_table = _norm(normalized_table, dialect=self.dialect) 3282 3283 if isinstance(table, exp.Table) and not join.args.get("on"): 3284 if normalized_table.parts[0].name in refs: 3285 table_as_column = table.to_column() 3286 unnest = exp.Unnest(expressions=[table_as_column]) 3287 3288 # Table.to_column creates a parent Alias node that we want to convert to 3289 # a TableAlias and attach to the Unnest, so it matches the parser's output 3290 if isinstance(table.args.get("alias"), exp.TableAlias): 3291 table_as_column.replace(table_as_column.this) 3292 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3293 3294 table.replace(unnest) 3295 3296 refs.add(normalized_table.alias_or_name) 3297 3298 return this 3299 3300 def _parse_query_modifiers( 3301 self, this: t.Optional[exp.Expression] 3302 ) -> t.Optional[exp.Expression]: 3303 if isinstance(this, (exp.Query, exp.Table)): 3304 for join in self._parse_joins(): 3305 this.append("joins", join) 3306 for lateral in iter(self._parse_lateral, None): 3307 this.append("laterals", lateral) 3308 3309 while True: 3310 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3311 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3312 key, expression = parser(self) 3313 3314 if expression: 3315 this.set(key, expression) 3316 if key == "limit": 3317 offset = expression.args.pop("offset", None) 3318 3319 if offset: 3320 offset = exp.Offset(expression=offset) 3321 this.set("offset", offset) 3322 3323 limit_by_expressions = expression.expressions 3324 expression.set("expressions", None) 3325 offset.set("expressions", limit_by_expressions) 3326 continue 3327 break 3328 3329 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3330 this = self._implicit_unnests_to_explicit(this) 3331 3332 return this 3333 3334 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3335 start = self._curr 3336 while self._curr: 3337 self._advance() 3338 3339 end = self._tokens[self._index - 1] 3340 return exp.Hint(expressions=[self._find_sql(start, end)]) 3341 3342 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3343 return self._parse_function_call() 3344 3345 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3346 start_index = self._index 3347 should_fallback_to_string = False 3348 3349 hints = [] 3350 try: 3351 for hint in iter( 3352 lambda: self._parse_csv( 3353 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3354 ), 3355 [], 3356 ): 3357 hints.extend(hint) 3358 except ParseError: 3359 should_fallback_to_string = True 3360 3361 if should_fallback_to_string or self._curr: 3362 self._retreat(start_index) 3363 return self._parse_hint_fallback_to_string() 3364 3365 return self.expression(exp.Hint, expressions=hints) 3366 3367 def _parse_hint(self) -> t.Optional[exp.Hint]: 3368 if self._match(TokenType.HINT) and self._prev_comments: 3369 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3370 3371 return None 3372 3373 def _parse_into(self) -> t.Optional[exp.Into]: 3374 if not self._match(TokenType.INTO): 3375 return None 3376 3377 temp = self._match(TokenType.TEMPORARY) 3378 unlogged = self._match_text_seq("UNLOGGED") 3379 self._match(TokenType.TABLE) 3380 3381 return self.expression( 3382 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3383 ) 3384 3385 def _parse_from( 3386 self, joins: bool = False, skip_from_token: bool = False 3387 ) -> t.Optional[exp.From]: 3388 if not skip_from_token and not self._match(TokenType.FROM): 3389 return None 3390 3391 return self.expression( 3392 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3393 ) 3394 3395 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3396 return self.expression( 3397 exp.MatchRecognizeMeasure, 3398 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3399 this=self._parse_expression(), 3400 ) 3401 3402 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3403 if not self._match(TokenType.MATCH_RECOGNIZE): 3404 return None 3405 3406 self._match_l_paren() 3407 3408 partition = self._parse_partition_by() 3409 order = self._parse_order() 3410 3411 measures = ( 3412 self._parse_csv(self._parse_match_recognize_measure) 3413 if self._match_text_seq("MEASURES") 3414 else None 3415 ) 3416 3417 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3418 rows = exp.var("ONE ROW PER MATCH") 3419 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3420 text = "ALL ROWS PER MATCH" 3421 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3422 text += " SHOW EMPTY MATCHES" 3423 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3424 text += " OMIT EMPTY MATCHES" 3425 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3426 text += " WITH UNMATCHED ROWS" 3427 rows = exp.var(text) 3428 else: 3429 rows = None 3430 3431 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3432 text = "AFTER MATCH SKIP" 3433 if self._match_text_seq("PAST", "LAST", "ROW"): 3434 text += " PAST LAST ROW" 3435 elif self._match_text_seq("TO", "NEXT", "ROW"): 3436 text += " TO NEXT ROW" 3437 elif self._match_text_seq("TO", "FIRST"): 3438 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3439 elif self._match_text_seq("TO", "LAST"): 3440 text += f" TO LAST {self._advance_any().text}" # type: ignore 3441 after = exp.var(text) 3442 else: 3443 after = None 3444 3445 if self._match_text_seq("PATTERN"): 3446 self._match_l_paren() 3447 3448 if not self._curr: 3449 self.raise_error("Expecting )", self._curr) 3450 3451 paren = 1 3452 start = self._curr 3453 3454 while self._curr and paren > 0: 3455 if self._curr.token_type == TokenType.L_PAREN: 3456 paren += 1 3457 if self._curr.token_type == TokenType.R_PAREN: 3458 paren -= 1 3459 3460 end = self._prev 3461 self._advance() 3462 3463 if paren > 0: 3464 self.raise_error("Expecting )", self._curr) 3465 3466 pattern = exp.var(self._find_sql(start, end)) 3467 else: 3468 pattern = None 3469 3470 define = ( 3471 self._parse_csv(self._parse_name_as_expression) 3472 if self._match_text_seq("DEFINE") 3473 else None 3474 ) 3475 3476 self._match_r_paren() 3477 3478 return self.expression( 3479 exp.MatchRecognize, 3480 partition_by=partition, 3481 order=order, 3482 measures=measures, 3483 rows=rows, 3484 after=after, 3485 pattern=pattern, 3486 define=define, 3487 alias=self._parse_table_alias(), 3488 ) 3489 3490 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3491 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3492 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3493 cross_apply = False 3494 3495 if cross_apply is not None: 3496 this = self._parse_select(table=True) 3497 view = None 3498 outer = None 3499 elif self._match(TokenType.LATERAL): 3500 this = self._parse_select(table=True) 3501 view = self._match(TokenType.VIEW) 3502 outer = self._match(TokenType.OUTER) 3503 else: 3504 return None 3505 3506 if not this: 3507 this = ( 3508 self._parse_unnest() 3509 or self._parse_function() 3510 or self._parse_id_var(any_token=False) 3511 ) 3512 3513 while self._match(TokenType.DOT): 3514 this = exp.Dot( 3515 this=this, 3516 expression=self._parse_function() or self._parse_id_var(any_token=False), 3517 ) 3518 3519 if view: 3520 table = self._parse_id_var(any_token=False) 3521 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3522 table_alias: t.Optional[exp.TableAlias] = self.expression( 3523 exp.TableAlias, this=table, columns=columns 3524 ) 3525 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3526 # We move the alias from the lateral's child node to the lateral itself 3527 table_alias = this.args["alias"].pop() 3528 else: 3529 table_alias = self._parse_table_alias() 3530 3531 return self.expression( 3532 exp.Lateral, 3533 this=this, 3534 view=view, 3535 outer=outer, 3536 alias=table_alias, 3537 cross_apply=cross_apply, 3538 ) 3539 3540 def _parse_join_parts( 3541 self, 3542 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3543 return ( 3544 self._match_set(self.JOIN_METHODS) and self._prev, 3545 self._match_set(self.JOIN_SIDES) and self._prev, 3546 self._match_set(self.JOIN_KINDS) and self._prev, 3547 ) 3548 3549 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3550 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3551 this = self._parse_column() 3552 if isinstance(this, exp.Column): 3553 return this.this 3554 return this 3555 3556 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3557 3558 def _parse_join( 3559 self, skip_join_token: bool = False, parse_bracket: bool = False 3560 ) -> t.Optional[exp.Join]: 3561 if self._match(TokenType.COMMA): 3562 return self.expression(exp.Join, this=self._parse_table()) 3563 3564 index = self._index 3565 method, side, kind = self._parse_join_parts() 3566 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3567 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3568 3569 if not skip_join_token and not join: 3570 self._retreat(index) 3571 kind = None 3572 method = None 3573 side = None 3574 3575 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3576 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3577 3578 if not skip_join_token and not join and not outer_apply and not cross_apply: 3579 return None 3580 3581 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3582 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3583 kwargs["expressions"] = self._parse_csv( 3584 lambda: self._parse_table(parse_bracket=parse_bracket) 3585 ) 3586 3587 if method: 3588 kwargs["method"] = method.text 3589 if side: 3590 kwargs["side"] = side.text 3591 if kind: 3592 kwargs["kind"] = kind.text 3593 if hint: 3594 kwargs["hint"] = hint 3595 3596 if self._match(TokenType.MATCH_CONDITION): 3597 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3598 3599 if self._match(TokenType.ON): 3600 kwargs["on"] = self._parse_assignment() 3601 elif self._match(TokenType.USING): 3602 kwargs["using"] = self._parse_using_identifiers() 3603 elif ( 3604 not (outer_apply or cross_apply) 3605 and not isinstance(kwargs["this"], exp.Unnest) 3606 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3607 ): 3608 index = self._index 3609 joins: t.Optional[list] = list(self._parse_joins()) 3610 3611 if joins and self._match(TokenType.ON): 3612 kwargs["on"] = self._parse_assignment() 3613 elif joins and self._match(TokenType.USING): 3614 kwargs["using"] = self._parse_using_identifiers() 3615 else: 3616 joins = None 3617 self._retreat(index) 3618 3619 kwargs["this"].set("joins", joins if joins else None) 3620 3621 comments = [c for token in (method, side, kind) if token for c in token.comments] 3622 return self.expression(exp.Join, comments=comments, **kwargs) 3623 3624 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3625 this = self._parse_assignment() 3626 3627 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3628 return this 3629 3630 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3631 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3632 3633 return this 3634 3635 def _parse_index_params(self) -> exp.IndexParameters: 3636 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3637 3638 if self._match(TokenType.L_PAREN, advance=False): 3639 columns = self._parse_wrapped_csv(self._parse_with_operator) 3640 else: 3641 columns = None 3642 3643 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3644 partition_by = self._parse_partition_by() 3645 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3646 tablespace = ( 3647 self._parse_var(any_token=True) 3648 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3649 else None 3650 ) 3651 where = self._parse_where() 3652 3653 on = self._parse_field() if self._match(TokenType.ON) else None 3654 3655 return self.expression( 3656 exp.IndexParameters, 3657 using=using, 3658 columns=columns, 3659 include=include, 3660 partition_by=partition_by, 3661 where=where, 3662 with_storage=with_storage, 3663 tablespace=tablespace, 3664 on=on, 3665 ) 3666 3667 def _parse_index( 3668 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3669 ) -> t.Optional[exp.Index]: 3670 if index or anonymous: 3671 unique = None 3672 primary = None 3673 amp = None 3674 3675 self._match(TokenType.ON) 3676 self._match(TokenType.TABLE) # hive 3677 table = self._parse_table_parts(schema=True) 3678 else: 3679 unique = self._match(TokenType.UNIQUE) 3680 primary = self._match_text_seq("PRIMARY") 3681 amp = self._match_text_seq("AMP") 3682 3683 if not self._match(TokenType.INDEX): 3684 return None 3685 3686 index = self._parse_id_var() 3687 table = None 3688 3689 params = self._parse_index_params() 3690 3691 return self.expression( 3692 exp.Index, 3693 this=index, 3694 table=table, 3695 unique=unique, 3696 primary=primary, 3697 amp=amp, 3698 params=params, 3699 ) 3700 3701 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3702 hints: t.List[exp.Expression] = [] 3703 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3704 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3705 hints.append( 3706 self.expression( 3707 exp.WithTableHint, 3708 expressions=self._parse_csv( 3709 lambda: self._parse_function() or self._parse_var(any_token=True) 3710 ), 3711 ) 3712 ) 3713 self._match_r_paren() 3714 else: 3715 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3716 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3717 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3718 3719 self._match_set((TokenType.INDEX, TokenType.KEY)) 3720 if self._match(TokenType.FOR): 3721 hint.set("target", self._advance_any() and self._prev.text.upper()) 3722 3723 hint.set("expressions", self._parse_wrapped_id_vars()) 3724 hints.append(hint) 3725 3726 return hints or None 3727 3728 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3729 return ( 3730 (not schema and self._parse_function(optional_parens=False)) 3731 or self._parse_id_var(any_token=False) 3732 or self._parse_string_as_identifier() 3733 or self._parse_placeholder() 3734 ) 3735 3736 def _parse_table_parts( 3737 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3738 ) -> exp.Table: 3739 catalog = None 3740 db = None 3741 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3742 3743 while self._match(TokenType.DOT): 3744 if catalog: 3745 # This allows nesting the table in arbitrarily many dot expressions if needed 3746 table = self.expression( 3747 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3748 ) 3749 else: 3750 catalog = db 3751 db = table 3752 # "" used for tsql FROM a..b case 3753 table = self._parse_table_part(schema=schema) or "" 3754 3755 if ( 3756 wildcard 3757 and self._is_connected() 3758 and (isinstance(table, exp.Identifier) or not table) 3759 and self._match(TokenType.STAR) 3760 ): 3761 if isinstance(table, exp.Identifier): 3762 table.args["this"] += "*" 3763 else: 3764 table = exp.Identifier(this="*") 3765 3766 # We bubble up comments from the Identifier to the Table 3767 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3768 3769 if is_db_reference: 3770 catalog = db 3771 db = table 3772 table = None 3773 3774 if not table and not is_db_reference: 3775 self.raise_error(f"Expected table name but got {self._curr}") 3776 if not db and is_db_reference: 3777 self.raise_error(f"Expected database name but got {self._curr}") 3778 3779 table = self.expression( 3780 exp.Table, 3781 comments=comments, 3782 this=table, 3783 db=db, 3784 catalog=catalog, 3785 ) 3786 3787 changes = self._parse_changes() 3788 if changes: 3789 table.set("changes", changes) 3790 3791 at_before = self._parse_historical_data() 3792 if at_before: 3793 table.set("when", at_before) 3794 3795 pivots = self._parse_pivots() 3796 if pivots: 3797 table.set("pivots", pivots) 3798 3799 return table 3800 3801 def _parse_table( 3802 self, 3803 schema: bool = False, 3804 joins: bool = False, 3805 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3806 parse_bracket: bool = False, 3807 is_db_reference: bool = False, 3808 parse_partition: bool = False, 3809 ) -> t.Optional[exp.Expression]: 3810 lateral = self._parse_lateral() 3811 if lateral: 3812 return lateral 3813 3814 unnest = self._parse_unnest() 3815 if unnest: 3816 return unnest 3817 3818 values = self._parse_derived_table_values() 3819 if values: 3820 return values 3821 3822 subquery = self._parse_select(table=True) 3823 if subquery: 3824 if not subquery.args.get("pivots"): 3825 subquery.set("pivots", self._parse_pivots()) 3826 return subquery 3827 3828 bracket = parse_bracket and self._parse_bracket(None) 3829 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3830 3831 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3832 self._parse_table 3833 ) 3834 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3835 3836 only = self._match(TokenType.ONLY) 3837 3838 this = t.cast( 3839 exp.Expression, 3840 bracket 3841 or rows_from 3842 or self._parse_bracket( 3843 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3844 ), 3845 ) 3846 3847 if only: 3848 this.set("only", only) 3849 3850 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3851 self._match_text_seq("*") 3852 3853 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3854 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3855 this.set("partition", self._parse_partition()) 3856 3857 if schema: 3858 return self._parse_schema(this=this) 3859 3860 version = self._parse_version() 3861 3862 if version: 3863 this.set("version", version) 3864 3865 if self.dialect.ALIAS_POST_TABLESAMPLE: 3866 this.set("sample", self._parse_table_sample()) 3867 3868 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3869 if alias: 3870 this.set("alias", alias) 3871 3872 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3873 return self.expression( 3874 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3875 ) 3876 3877 this.set("hints", self._parse_table_hints()) 3878 3879 if not this.args.get("pivots"): 3880 this.set("pivots", self._parse_pivots()) 3881 3882 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3883 this.set("sample", self._parse_table_sample()) 3884 3885 if joins: 3886 for join in self._parse_joins(): 3887 this.append("joins", join) 3888 3889 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3890 this.set("ordinality", True) 3891 this.set("alias", self._parse_table_alias()) 3892 3893 return this 3894 3895 def _parse_version(self) -> t.Optional[exp.Version]: 3896 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3897 this = "TIMESTAMP" 3898 elif self._match(TokenType.VERSION_SNAPSHOT): 3899 this = "VERSION" 3900 else: 3901 return None 3902 3903 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3904 kind = self._prev.text.upper() 3905 start = self._parse_bitwise() 3906 self._match_texts(("TO", "AND")) 3907 end = self._parse_bitwise() 3908 expression: t.Optional[exp.Expression] = self.expression( 3909 exp.Tuple, expressions=[start, end] 3910 ) 3911 elif self._match_text_seq("CONTAINED", "IN"): 3912 kind = "CONTAINED IN" 3913 expression = self.expression( 3914 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3915 ) 3916 elif self._match(TokenType.ALL): 3917 kind = "ALL" 3918 expression = None 3919 else: 3920 self._match_text_seq("AS", "OF") 3921 kind = "AS OF" 3922 expression = self._parse_type() 3923 3924 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3925 3926 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3927 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3928 index = self._index 3929 historical_data = None 3930 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3931 this = self._prev.text.upper() 3932 kind = ( 3933 self._match(TokenType.L_PAREN) 3934 and self._match_texts(self.HISTORICAL_DATA_KIND) 3935 and self._prev.text.upper() 3936 ) 3937 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3938 3939 if expression: 3940 self._match_r_paren() 3941 historical_data = self.expression( 3942 exp.HistoricalData, this=this, kind=kind, expression=expression 3943 ) 3944 else: 3945 self._retreat(index) 3946 3947 return historical_data 3948 3949 def _parse_changes(self) -> t.Optional[exp.Changes]: 3950 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3951 return None 3952 3953 information = self._parse_var(any_token=True) 3954 self._match_r_paren() 3955 3956 return self.expression( 3957 exp.Changes, 3958 information=information, 3959 at_before=self._parse_historical_data(), 3960 end=self._parse_historical_data(), 3961 ) 3962 3963 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3964 if not self._match(TokenType.UNNEST): 3965 return None 3966 3967 expressions = self._parse_wrapped_csv(self._parse_equality) 3968 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3969 3970 alias = self._parse_table_alias() if with_alias else None 3971 3972 if alias: 3973 if self.dialect.UNNEST_COLUMN_ONLY: 3974 if alias.args.get("columns"): 3975 self.raise_error("Unexpected extra column alias in unnest.") 3976 3977 alias.set("columns", [alias.this]) 3978 alias.set("this", None) 3979 3980 columns = alias.args.get("columns") or [] 3981 if offset and len(expressions) < len(columns): 3982 offset = columns.pop() 3983 3984 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3985 self._match(TokenType.ALIAS) 3986 offset = self._parse_id_var( 3987 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3988 ) or exp.to_identifier("offset") 3989 3990 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3991 3992 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3993 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3994 if not is_derived and not ( 3995 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3996 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3997 ): 3998 return None 3999 4000 expressions = self._parse_csv(self._parse_value) 4001 alias = self._parse_table_alias() 4002 4003 if is_derived: 4004 self._match_r_paren() 4005 4006 return self.expression( 4007 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4008 ) 4009 4010 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4011 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4012 as_modifier and self._match_text_seq("USING", "SAMPLE") 4013 ): 4014 return None 4015 4016 bucket_numerator = None 4017 bucket_denominator = None 4018 bucket_field = None 4019 percent = None 4020 size = None 4021 seed = None 4022 4023 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4024 matched_l_paren = self._match(TokenType.L_PAREN) 4025 4026 if self.TABLESAMPLE_CSV: 4027 num = None 4028 expressions = self._parse_csv(self._parse_primary) 4029 else: 4030 expressions = None 4031 num = ( 4032 self._parse_factor() 4033 if self._match(TokenType.NUMBER, advance=False) 4034 else self._parse_primary() or self._parse_placeholder() 4035 ) 4036 4037 if self._match_text_seq("BUCKET"): 4038 bucket_numerator = self._parse_number() 4039 self._match_text_seq("OUT", "OF") 4040 bucket_denominator = bucket_denominator = self._parse_number() 4041 self._match(TokenType.ON) 4042 bucket_field = self._parse_field() 4043 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4044 percent = num 4045 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4046 size = num 4047 else: 4048 percent = num 4049 4050 if matched_l_paren: 4051 self._match_r_paren() 4052 4053 if self._match(TokenType.L_PAREN): 4054 method = self._parse_var(upper=True) 4055 seed = self._match(TokenType.COMMA) and self._parse_number() 4056 self._match_r_paren() 4057 elif self._match_texts(("SEED", "REPEATABLE")): 4058 seed = self._parse_wrapped(self._parse_number) 4059 4060 if not method and self.DEFAULT_SAMPLING_METHOD: 4061 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4062 4063 return self.expression( 4064 exp.TableSample, 4065 expressions=expressions, 4066 method=method, 4067 bucket_numerator=bucket_numerator, 4068 bucket_denominator=bucket_denominator, 4069 bucket_field=bucket_field, 4070 percent=percent, 4071 size=size, 4072 seed=seed, 4073 ) 4074 4075 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4076 return list(iter(self._parse_pivot, None)) or None 4077 4078 def _parse_joins(self) -> t.Iterator[exp.Join]: 4079 return iter(self._parse_join, None) 4080 4081 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4082 if not self._match(TokenType.INTO): 4083 return None 4084 4085 return self.expression( 4086 exp.UnpivotColumns, 4087 this=self._match_text_seq("NAME") and self._parse_column(), 4088 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4089 ) 4090 4091 # https://duckdb.org/docs/sql/statements/pivot 4092 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4093 def _parse_on() -> t.Optional[exp.Expression]: 4094 this = self._parse_bitwise() 4095 4096 if self._match(TokenType.IN): 4097 # PIVOT ... ON col IN (row_val1, row_val2) 4098 return self._parse_in(this) 4099 if self._match(TokenType.ALIAS, advance=False): 4100 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4101 return self._parse_alias(this) 4102 4103 return this 4104 4105 this = self._parse_table() 4106 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4107 into = self._parse_unpivot_columns() 4108 using = self._match(TokenType.USING) and self._parse_csv( 4109 lambda: self._parse_alias(self._parse_function()) 4110 ) 4111 group = self._parse_group() 4112 4113 return self.expression( 4114 exp.Pivot, 4115 this=this, 4116 expressions=expressions, 4117 using=using, 4118 group=group, 4119 unpivot=is_unpivot, 4120 into=into, 4121 ) 4122 4123 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4124 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4125 this = self._parse_select_or_expression() 4126 4127 self._match(TokenType.ALIAS) 4128 alias = self._parse_bitwise() 4129 if alias: 4130 if isinstance(alias, exp.Column) and not alias.db: 4131 alias = alias.this 4132 return self.expression(exp.PivotAlias, this=this, alias=alias) 4133 4134 return this 4135 4136 value = self._parse_column() 4137 4138 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4139 self.raise_error("Expecting IN (") 4140 4141 if self._match(TokenType.ANY): 4142 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4143 else: 4144 exprs = self._parse_csv(_parse_aliased_expression) 4145 4146 self._match_r_paren() 4147 return self.expression(exp.In, this=value, expressions=exprs) 4148 4149 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4150 index = self._index 4151 include_nulls = None 4152 4153 if self._match(TokenType.PIVOT): 4154 unpivot = False 4155 elif self._match(TokenType.UNPIVOT): 4156 unpivot = True 4157 4158 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4159 if self._match_text_seq("INCLUDE", "NULLS"): 4160 include_nulls = True 4161 elif self._match_text_seq("EXCLUDE", "NULLS"): 4162 include_nulls = False 4163 else: 4164 return None 4165 4166 expressions = [] 4167 4168 if not self._match(TokenType.L_PAREN): 4169 self._retreat(index) 4170 return None 4171 4172 if unpivot: 4173 expressions = self._parse_csv(self._parse_column) 4174 else: 4175 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4176 4177 if not expressions: 4178 self.raise_error("Failed to parse PIVOT's aggregation list") 4179 4180 if not self._match(TokenType.FOR): 4181 self.raise_error("Expecting FOR") 4182 4183 field = self._parse_pivot_in() 4184 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4185 self._parse_bitwise 4186 ) 4187 4188 self._match_r_paren() 4189 4190 pivot = self.expression( 4191 exp.Pivot, 4192 expressions=expressions, 4193 field=field, 4194 unpivot=unpivot, 4195 include_nulls=include_nulls, 4196 default_on_null=default_on_null, 4197 ) 4198 4199 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4200 pivot.set("alias", self._parse_table_alias()) 4201 4202 if not unpivot: 4203 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4204 4205 columns: t.List[exp.Expression] = [] 4206 for fld in pivot.args["field"].expressions: 4207 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4208 for name in names: 4209 if self.PREFIXED_PIVOT_COLUMNS: 4210 name = f"{name}_{field_name}" if name else field_name 4211 else: 4212 name = f"{field_name}_{name}" if name else field_name 4213 4214 columns.append(exp.to_identifier(name)) 4215 4216 pivot.set("columns", columns) 4217 4218 return pivot 4219 4220 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4221 return [agg.alias for agg in aggregations] 4222 4223 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4224 if not skip_where_token and not self._match(TokenType.PREWHERE): 4225 return None 4226 4227 return self.expression( 4228 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4229 ) 4230 4231 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4232 if not skip_where_token and not self._match(TokenType.WHERE): 4233 return None 4234 4235 return self.expression( 4236 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4237 ) 4238 4239 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4240 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4241 return None 4242 4243 elements: t.Dict[str, t.Any] = defaultdict(list) 4244 4245 if self._match(TokenType.ALL): 4246 elements["all"] = True 4247 elif self._match(TokenType.DISTINCT): 4248 elements["all"] = False 4249 4250 while True: 4251 index = self._index 4252 4253 elements["expressions"].extend( 4254 self._parse_csv( 4255 lambda: None 4256 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4257 else self._parse_assignment() 4258 ) 4259 ) 4260 4261 before_with_index = self._index 4262 with_prefix = self._match(TokenType.WITH) 4263 4264 if self._match(TokenType.ROLLUP): 4265 elements["rollup"].append( 4266 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4267 ) 4268 elif self._match(TokenType.CUBE): 4269 elements["cube"].append( 4270 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4271 ) 4272 elif self._match(TokenType.GROUPING_SETS): 4273 elements["grouping_sets"].append( 4274 self.expression( 4275 exp.GroupingSets, 4276 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4277 ) 4278 ) 4279 elif self._match_text_seq("TOTALS"): 4280 elements["totals"] = True # type: ignore 4281 4282 if before_with_index <= self._index <= before_with_index + 1: 4283 self._retreat(before_with_index) 4284 break 4285 4286 if index == self._index: 4287 break 4288 4289 return self.expression(exp.Group, **elements) # type: ignore 4290 4291 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4292 return self.expression( 4293 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4294 ) 4295 4296 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4297 if self._match(TokenType.L_PAREN): 4298 grouping_set = self._parse_csv(self._parse_column) 4299 self._match_r_paren() 4300 return self.expression(exp.Tuple, expressions=grouping_set) 4301 4302 return self._parse_column() 4303 4304 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4305 if not skip_having_token and not self._match(TokenType.HAVING): 4306 return None 4307 return self.expression(exp.Having, this=self._parse_assignment()) 4308 4309 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4310 if not self._match(TokenType.QUALIFY): 4311 return None 4312 return self.expression(exp.Qualify, this=self._parse_assignment()) 4313 4314 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4315 if skip_start_token: 4316 start = None 4317 elif self._match(TokenType.START_WITH): 4318 start = self._parse_assignment() 4319 else: 4320 return None 4321 4322 self._match(TokenType.CONNECT_BY) 4323 nocycle = self._match_text_seq("NOCYCLE") 4324 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4325 exp.Prior, this=self._parse_bitwise() 4326 ) 4327 connect = self._parse_assignment() 4328 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4329 4330 if not start and self._match(TokenType.START_WITH): 4331 start = self._parse_assignment() 4332 4333 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4334 4335 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4336 this = self._parse_id_var(any_token=True) 4337 if self._match(TokenType.ALIAS): 4338 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4339 return this 4340 4341 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4342 if self._match_text_seq("INTERPOLATE"): 4343 return self._parse_wrapped_csv(self._parse_name_as_expression) 4344 return None 4345 4346 def _parse_order( 4347 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4348 ) -> t.Optional[exp.Expression]: 4349 siblings = None 4350 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4351 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4352 return this 4353 4354 siblings = True 4355 4356 return self.expression( 4357 exp.Order, 4358 this=this, 4359 expressions=self._parse_csv(self._parse_ordered), 4360 siblings=siblings, 4361 ) 4362 4363 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4364 if not self._match(token): 4365 return None 4366 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4367 4368 def _parse_ordered( 4369 self, parse_method: t.Optional[t.Callable] = None 4370 ) -> t.Optional[exp.Ordered]: 4371 this = parse_method() if parse_method else self._parse_assignment() 4372 if not this: 4373 return None 4374 4375 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4376 this = exp.var("ALL") 4377 4378 asc = self._match(TokenType.ASC) 4379 desc = self._match(TokenType.DESC) or (asc and False) 4380 4381 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4382 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4383 4384 nulls_first = is_nulls_first or False 4385 explicitly_null_ordered = is_nulls_first or is_nulls_last 4386 4387 if ( 4388 not explicitly_null_ordered 4389 and ( 4390 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4391 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4392 ) 4393 and self.dialect.NULL_ORDERING != "nulls_are_last" 4394 ): 4395 nulls_first = True 4396 4397 if self._match_text_seq("WITH", "FILL"): 4398 with_fill = self.expression( 4399 exp.WithFill, 4400 **{ # type: ignore 4401 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4402 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4403 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4404 "interpolate": self._parse_interpolate(), 4405 }, 4406 ) 4407 else: 4408 with_fill = None 4409 4410 return self.expression( 4411 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4412 ) 4413 4414 def _parse_limit( 4415 self, 4416 this: t.Optional[exp.Expression] = None, 4417 top: bool = False, 4418 skip_limit_token: bool = False, 4419 ) -> t.Optional[exp.Expression]: 4420 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4421 comments = self._prev_comments 4422 if top: 4423 limit_paren = self._match(TokenType.L_PAREN) 4424 expression = self._parse_term() if limit_paren else self._parse_number() 4425 4426 if limit_paren: 4427 self._match_r_paren() 4428 else: 4429 expression = self._parse_term() 4430 4431 if self._match(TokenType.COMMA): 4432 offset = expression 4433 expression = self._parse_term() 4434 else: 4435 offset = None 4436 4437 limit_exp = self.expression( 4438 exp.Limit, 4439 this=this, 4440 expression=expression, 4441 offset=offset, 4442 comments=comments, 4443 expressions=self._parse_limit_by(), 4444 ) 4445 4446 return limit_exp 4447 4448 if self._match(TokenType.FETCH): 4449 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4450 direction = self._prev.text.upper() if direction else "FIRST" 4451 4452 count = self._parse_field(tokens=self.FETCH_TOKENS) 4453 percent = self._match(TokenType.PERCENT) 4454 4455 self._match_set((TokenType.ROW, TokenType.ROWS)) 4456 4457 only = self._match_text_seq("ONLY") 4458 with_ties = self._match_text_seq("WITH", "TIES") 4459 4460 if only and with_ties: 4461 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4462 4463 return self.expression( 4464 exp.Fetch, 4465 direction=direction, 4466 count=count, 4467 percent=percent, 4468 with_ties=with_ties, 4469 ) 4470 4471 return this 4472 4473 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4474 if not self._match(TokenType.OFFSET): 4475 return this 4476 4477 count = self._parse_term() 4478 self._match_set((TokenType.ROW, TokenType.ROWS)) 4479 4480 return self.expression( 4481 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4482 ) 4483 4484 def _can_parse_limit_or_offset(self) -> bool: 4485 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4486 return False 4487 4488 index = self._index 4489 result = bool( 4490 self._try_parse(self._parse_limit, retreat=True) 4491 or self._try_parse(self._parse_offset, retreat=True) 4492 ) 4493 self._retreat(index) 4494 return result 4495 4496 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4497 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4498 4499 def _parse_locks(self) -> t.List[exp.Lock]: 4500 locks = [] 4501 while True: 4502 if self._match_text_seq("FOR", "UPDATE"): 4503 update = True 4504 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4505 "LOCK", "IN", "SHARE", "MODE" 4506 ): 4507 update = False 4508 else: 4509 break 4510 4511 expressions = None 4512 if self._match_text_seq("OF"): 4513 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4514 4515 wait: t.Optional[bool | exp.Expression] = None 4516 if self._match_text_seq("NOWAIT"): 4517 wait = True 4518 elif self._match_text_seq("WAIT"): 4519 wait = self._parse_primary() 4520 elif self._match_text_seq("SKIP", "LOCKED"): 4521 wait = False 4522 4523 locks.append( 4524 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4525 ) 4526 4527 return locks 4528 4529 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4530 while this and self._match_set(self.SET_OPERATIONS): 4531 token_type = self._prev.token_type 4532 4533 if token_type == TokenType.UNION: 4534 operation: t.Type[exp.SetOperation] = exp.Union 4535 elif token_type == TokenType.EXCEPT: 4536 operation = exp.Except 4537 else: 4538 operation = exp.Intersect 4539 4540 comments = self._prev.comments 4541 4542 if self._match(TokenType.DISTINCT): 4543 distinct: t.Optional[bool] = True 4544 elif self._match(TokenType.ALL): 4545 distinct = False 4546 else: 4547 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4548 if distinct is None: 4549 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4550 4551 by_name = self._match_text_seq("BY", "NAME") 4552 expression = self._parse_select(nested=True, parse_set_operation=False) 4553 4554 this = self.expression( 4555 operation, 4556 comments=comments, 4557 this=this, 4558 distinct=distinct, 4559 by_name=by_name, 4560 expression=expression, 4561 ) 4562 4563 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4564 expression = this.expression 4565 4566 if expression: 4567 for arg in self.SET_OP_MODIFIERS: 4568 expr = expression.args.get(arg) 4569 if expr: 4570 this.set(arg, expr.pop()) 4571 4572 return this 4573 4574 def _parse_expression(self) -> t.Optional[exp.Expression]: 4575 return self._parse_alias(self._parse_assignment()) 4576 4577 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4578 this = self._parse_disjunction() 4579 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4580 # This allows us to parse <non-identifier token> := <expr> 4581 this = exp.column( 4582 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4583 ) 4584 4585 while self._match_set(self.ASSIGNMENT): 4586 if isinstance(this, exp.Column) and len(this.parts) == 1: 4587 this = this.this 4588 4589 this = self.expression( 4590 self.ASSIGNMENT[self._prev.token_type], 4591 this=this, 4592 comments=self._prev_comments, 4593 expression=self._parse_assignment(), 4594 ) 4595 4596 return this 4597 4598 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4599 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4600 4601 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4602 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4603 4604 def _parse_equality(self) -> t.Optional[exp.Expression]: 4605 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4606 4607 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4608 return self._parse_tokens(self._parse_range, self.COMPARISON) 4609 4610 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4611 this = this or self._parse_bitwise() 4612 negate = self._match(TokenType.NOT) 4613 4614 if self._match_set(self.RANGE_PARSERS): 4615 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4616 if not expression: 4617 return this 4618 4619 this = expression 4620 elif self._match(TokenType.ISNULL): 4621 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4622 4623 # Postgres supports ISNULL and NOTNULL for conditions. 4624 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4625 if self._match(TokenType.NOTNULL): 4626 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4627 this = self.expression(exp.Not, this=this) 4628 4629 if negate: 4630 this = self._negate_range(this) 4631 4632 if self._match(TokenType.IS): 4633 this = self._parse_is(this) 4634 4635 return this 4636 4637 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4638 if not this: 4639 return this 4640 4641 return self.expression(exp.Not, this=this) 4642 4643 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4644 index = self._index - 1 4645 negate = self._match(TokenType.NOT) 4646 4647 if self._match_text_seq("DISTINCT", "FROM"): 4648 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4649 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4650 4651 if self._match(TokenType.JSON): 4652 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4653 4654 if self._match_text_seq("WITH"): 4655 _with = True 4656 elif self._match_text_seq("WITHOUT"): 4657 _with = False 4658 else: 4659 _with = None 4660 4661 unique = self._match(TokenType.UNIQUE) 4662 self._match_text_seq("KEYS") 4663 expression: t.Optional[exp.Expression] = self.expression( 4664 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4665 ) 4666 else: 4667 expression = self._parse_primary() or self._parse_null() 4668 if not expression: 4669 self._retreat(index) 4670 return None 4671 4672 this = self.expression(exp.Is, this=this, expression=expression) 4673 return self.expression(exp.Not, this=this) if negate else this 4674 4675 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4676 unnest = self._parse_unnest(with_alias=False) 4677 if unnest: 4678 this = self.expression(exp.In, this=this, unnest=unnest) 4679 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4680 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4681 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4682 4683 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4684 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4685 else: 4686 this = self.expression(exp.In, this=this, expressions=expressions) 4687 4688 if matched_l_paren: 4689 self._match_r_paren(this) 4690 elif not self._match(TokenType.R_BRACKET, expression=this): 4691 self.raise_error("Expecting ]") 4692 else: 4693 this = self.expression(exp.In, this=this, field=self._parse_column()) 4694 4695 return this 4696 4697 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4698 low = self._parse_bitwise() 4699 self._match(TokenType.AND) 4700 high = self._parse_bitwise() 4701 return self.expression(exp.Between, this=this, low=low, high=high) 4702 4703 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4704 if not self._match(TokenType.ESCAPE): 4705 return this 4706 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4707 4708 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4709 index = self._index 4710 4711 if not self._match(TokenType.INTERVAL) and match_interval: 4712 return None 4713 4714 if self._match(TokenType.STRING, advance=False): 4715 this = self._parse_primary() 4716 else: 4717 this = self._parse_term() 4718 4719 if not this or ( 4720 isinstance(this, exp.Column) 4721 and not this.table 4722 and not this.this.quoted 4723 and this.name.upper() == "IS" 4724 ): 4725 self._retreat(index) 4726 return None 4727 4728 unit = self._parse_function() or ( 4729 not self._match(TokenType.ALIAS, advance=False) 4730 and self._parse_var(any_token=True, upper=True) 4731 ) 4732 4733 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4734 # each INTERVAL expression into this canonical form so it's easy to transpile 4735 if this and this.is_number: 4736 this = exp.Literal.string(this.to_py()) 4737 elif this and this.is_string: 4738 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4739 if parts and unit: 4740 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4741 unit = None 4742 self._retreat(self._index - 1) 4743 4744 if len(parts) == 1: 4745 this = exp.Literal.string(parts[0][0]) 4746 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4747 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4748 unit = self.expression( 4749 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4750 ) 4751 4752 interval = self.expression(exp.Interval, this=this, unit=unit) 4753 4754 index = self._index 4755 self._match(TokenType.PLUS) 4756 4757 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4758 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4759 return self.expression( 4760 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4761 ) 4762 4763 self._retreat(index) 4764 return interval 4765 4766 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4767 this = self._parse_term() 4768 4769 while True: 4770 if self._match_set(self.BITWISE): 4771 this = self.expression( 4772 self.BITWISE[self._prev.token_type], 4773 this=this, 4774 expression=self._parse_term(), 4775 ) 4776 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4777 this = self.expression( 4778 exp.DPipe, 4779 this=this, 4780 expression=self._parse_term(), 4781 safe=not self.dialect.STRICT_STRING_CONCAT, 4782 ) 4783 elif self._match(TokenType.DQMARK): 4784 this = self.expression( 4785 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4786 ) 4787 elif self._match_pair(TokenType.LT, TokenType.LT): 4788 this = self.expression( 4789 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4790 ) 4791 elif self._match_pair(TokenType.GT, TokenType.GT): 4792 this = self.expression( 4793 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4794 ) 4795 else: 4796 break 4797 4798 return this 4799 4800 def _parse_term(self) -> t.Optional[exp.Expression]: 4801 this = self._parse_factor() 4802 4803 while self._match_set(self.TERM): 4804 klass = self.TERM[self._prev.token_type] 4805 comments = self._prev_comments 4806 expression = self._parse_factor() 4807 4808 this = self.expression(klass, this=this, comments=comments, expression=expression) 4809 4810 if isinstance(this, exp.Collate): 4811 expr = this.expression 4812 4813 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4814 # fallback to Identifier / Var 4815 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4816 ident = expr.this 4817 if isinstance(ident, exp.Identifier): 4818 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4819 4820 return this 4821 4822 def _parse_factor(self) -> t.Optional[exp.Expression]: 4823 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4824 this = parse_method() 4825 4826 while self._match_set(self.FACTOR): 4827 klass = self.FACTOR[self._prev.token_type] 4828 comments = self._prev_comments 4829 expression = parse_method() 4830 4831 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4832 self._retreat(self._index - 1) 4833 return this 4834 4835 this = self.expression(klass, this=this, comments=comments, expression=expression) 4836 4837 if isinstance(this, exp.Div): 4838 this.args["typed"] = self.dialect.TYPED_DIVISION 4839 this.args["safe"] = self.dialect.SAFE_DIVISION 4840 4841 return this 4842 4843 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4844 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4845 4846 def _parse_unary(self) -> t.Optional[exp.Expression]: 4847 if self._match_set(self.UNARY_PARSERS): 4848 return self.UNARY_PARSERS[self._prev.token_type](self) 4849 return self._parse_at_time_zone(self._parse_type()) 4850 4851 def _parse_type( 4852 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4853 ) -> t.Optional[exp.Expression]: 4854 interval = parse_interval and self._parse_interval() 4855 if interval: 4856 return interval 4857 4858 index = self._index 4859 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4860 4861 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4862 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4863 if isinstance(data_type, exp.Cast): 4864 # This constructor can contain ops directly after it, for instance struct unnesting: 4865 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4866 return self._parse_column_ops(data_type) 4867 4868 if data_type: 4869 index2 = self._index 4870 this = self._parse_primary() 4871 4872 if isinstance(this, exp.Literal): 4873 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4874 if parser: 4875 return parser(self, this, data_type) 4876 4877 return self.expression(exp.Cast, this=this, to=data_type) 4878 4879 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4880 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4881 # 4882 # If the index difference here is greater than 1, that means the parser itself must have 4883 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4884 # 4885 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4886 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4887 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4888 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4889 # 4890 # In these cases, we don't really want to return the converted type, but instead retreat 4891 # and try to parse a Column or Identifier in the section below. 4892 if data_type.expressions and index2 - index > 1: 4893 self._retreat(index2) 4894 return self._parse_column_ops(data_type) 4895 4896 self._retreat(index) 4897 4898 if fallback_to_identifier: 4899 return self._parse_id_var() 4900 4901 this = self._parse_column() 4902 return this and self._parse_column_ops(this) 4903 4904 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4905 this = self._parse_type() 4906 if not this: 4907 return None 4908 4909 if isinstance(this, exp.Column) and not this.table: 4910 this = exp.var(this.name.upper()) 4911 4912 return self.expression( 4913 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4914 ) 4915 4916 def _parse_types( 4917 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4918 ) -> t.Optional[exp.Expression]: 4919 index = self._index 4920 4921 this: t.Optional[exp.Expression] = None 4922 prefix = self._match_text_seq("SYSUDTLIB", ".") 4923 4924 if not self._match_set(self.TYPE_TOKENS): 4925 identifier = allow_identifiers and self._parse_id_var( 4926 any_token=False, tokens=(TokenType.VAR,) 4927 ) 4928 if isinstance(identifier, exp.Identifier): 4929 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4930 4931 if len(tokens) != 1: 4932 self.raise_error("Unexpected identifier", self._prev) 4933 4934 if tokens[0].token_type in self.TYPE_TOKENS: 4935 self._prev = tokens[0] 4936 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4937 type_name = identifier.name 4938 4939 while self._match(TokenType.DOT): 4940 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4941 4942 this = exp.DataType.build(type_name, udt=True) 4943 else: 4944 self._retreat(self._index - 1) 4945 return None 4946 else: 4947 return None 4948 4949 type_token = self._prev.token_type 4950 4951 if type_token == TokenType.PSEUDO_TYPE: 4952 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4953 4954 if type_token == TokenType.OBJECT_IDENTIFIER: 4955 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4956 4957 # https://materialize.com/docs/sql/types/map/ 4958 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4959 key_type = self._parse_types( 4960 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4961 ) 4962 if not self._match(TokenType.FARROW): 4963 self._retreat(index) 4964 return None 4965 4966 value_type = self._parse_types( 4967 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4968 ) 4969 if not self._match(TokenType.R_BRACKET): 4970 self._retreat(index) 4971 return None 4972 4973 return exp.DataType( 4974 this=exp.DataType.Type.MAP, 4975 expressions=[key_type, value_type], 4976 nested=True, 4977 prefix=prefix, 4978 ) 4979 4980 nested = type_token in self.NESTED_TYPE_TOKENS 4981 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4982 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4983 expressions = None 4984 maybe_func = False 4985 4986 if self._match(TokenType.L_PAREN): 4987 if is_struct: 4988 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4989 elif nested: 4990 expressions = self._parse_csv( 4991 lambda: self._parse_types( 4992 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4993 ) 4994 ) 4995 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4996 this = expressions[0] 4997 this.set("nullable", True) 4998 self._match_r_paren() 4999 return this 5000 elif type_token in self.ENUM_TYPE_TOKENS: 5001 expressions = self._parse_csv(self._parse_equality) 5002 elif is_aggregate: 5003 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5004 any_token=False, tokens=(TokenType.VAR,) 5005 ) 5006 if not func_or_ident or not self._match(TokenType.COMMA): 5007 return None 5008 expressions = self._parse_csv( 5009 lambda: self._parse_types( 5010 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5011 ) 5012 ) 5013 expressions.insert(0, func_or_ident) 5014 else: 5015 expressions = self._parse_csv(self._parse_type_size) 5016 5017 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5018 if type_token == TokenType.VECTOR and len(expressions) == 2: 5019 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5020 5021 if not expressions or not self._match(TokenType.R_PAREN): 5022 self._retreat(index) 5023 return None 5024 5025 maybe_func = True 5026 5027 values: t.Optional[t.List[exp.Expression]] = None 5028 5029 if nested and self._match(TokenType.LT): 5030 if is_struct: 5031 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5032 else: 5033 expressions = self._parse_csv( 5034 lambda: self._parse_types( 5035 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5036 ) 5037 ) 5038 5039 if not self._match(TokenType.GT): 5040 self.raise_error("Expecting >") 5041 5042 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5043 values = self._parse_csv(self._parse_assignment) 5044 if not values and is_struct: 5045 values = None 5046 self._retreat(self._index - 1) 5047 else: 5048 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5049 5050 if type_token in self.TIMESTAMPS: 5051 if self._match_text_seq("WITH", "TIME", "ZONE"): 5052 maybe_func = False 5053 tz_type = ( 5054 exp.DataType.Type.TIMETZ 5055 if type_token in self.TIMES 5056 else exp.DataType.Type.TIMESTAMPTZ 5057 ) 5058 this = exp.DataType(this=tz_type, expressions=expressions) 5059 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5060 maybe_func = False 5061 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5062 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5063 maybe_func = False 5064 elif type_token == TokenType.INTERVAL: 5065 unit = self._parse_var(upper=True) 5066 if unit: 5067 if self._match_text_seq("TO"): 5068 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5069 5070 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5071 else: 5072 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5073 5074 if maybe_func and check_func: 5075 index2 = self._index 5076 peek = self._parse_string() 5077 5078 if not peek: 5079 self._retreat(index) 5080 return None 5081 5082 self._retreat(index2) 5083 5084 if not this: 5085 if self._match_text_seq("UNSIGNED"): 5086 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5087 if not unsigned_type_token: 5088 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5089 5090 type_token = unsigned_type_token or type_token 5091 5092 this = exp.DataType( 5093 this=exp.DataType.Type[type_token.value], 5094 expressions=expressions, 5095 nested=nested, 5096 prefix=prefix, 5097 ) 5098 5099 # Empty arrays/structs are allowed 5100 if values is not None: 5101 cls = exp.Struct if is_struct else exp.Array 5102 this = exp.cast(cls(expressions=values), this, copy=False) 5103 5104 elif expressions: 5105 this.set("expressions", expressions) 5106 5107 # https://materialize.com/docs/sql/types/list/#type-name 5108 while self._match(TokenType.LIST): 5109 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5110 5111 index = self._index 5112 5113 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5114 matched_array = self._match(TokenType.ARRAY) 5115 5116 while self._curr: 5117 datatype_token = self._prev.token_type 5118 matched_l_bracket = self._match(TokenType.L_BRACKET) 5119 if not matched_l_bracket and not matched_array: 5120 break 5121 5122 matched_array = False 5123 values = self._parse_csv(self._parse_assignment) or None 5124 if ( 5125 values 5126 and not schema 5127 and ( 5128 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5129 ) 5130 ): 5131 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5132 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5133 self._retreat(index) 5134 break 5135 5136 this = exp.DataType( 5137 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5138 ) 5139 self._match(TokenType.R_BRACKET) 5140 5141 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5142 converter = self.TYPE_CONVERTERS.get(this.this) 5143 if converter: 5144 this = converter(t.cast(exp.DataType, this)) 5145 5146 return this 5147 5148 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5149 index = self._index 5150 5151 if ( 5152 self._curr 5153 and self._next 5154 and self._curr.token_type in self.TYPE_TOKENS 5155 and self._next.token_type in self.TYPE_TOKENS 5156 ): 5157 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5158 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5159 this = self._parse_id_var() 5160 else: 5161 this = ( 5162 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5163 or self._parse_id_var() 5164 ) 5165 5166 self._match(TokenType.COLON) 5167 5168 if ( 5169 type_required 5170 and not isinstance(this, exp.DataType) 5171 and not self._match_set(self.TYPE_TOKENS, advance=False) 5172 ): 5173 self._retreat(index) 5174 return self._parse_types() 5175 5176 return self._parse_column_def(this) 5177 5178 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5179 if not self._match_text_seq("AT", "TIME", "ZONE"): 5180 return this 5181 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5182 5183 def _parse_column(self) -> t.Optional[exp.Expression]: 5184 this = self._parse_column_reference() 5185 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5186 5187 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5188 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5189 5190 return column 5191 5192 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5193 this = self._parse_field() 5194 if ( 5195 not this 5196 and self._match(TokenType.VALUES, advance=False) 5197 and self.VALUES_FOLLOWED_BY_PAREN 5198 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5199 ): 5200 this = self._parse_id_var() 5201 5202 if isinstance(this, exp.Identifier): 5203 # We bubble up comments from the Identifier to the Column 5204 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5205 5206 return this 5207 5208 def _parse_colon_as_variant_extract( 5209 self, this: t.Optional[exp.Expression] 5210 ) -> t.Optional[exp.Expression]: 5211 casts = [] 5212 json_path = [] 5213 escape = None 5214 5215 while self._match(TokenType.COLON): 5216 start_index = self._index 5217 5218 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5219 path = self._parse_column_ops( 5220 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5221 ) 5222 5223 # The cast :: operator has a lower precedence than the extraction operator :, so 5224 # we rearrange the AST appropriately to avoid casting the JSON path 5225 while isinstance(path, exp.Cast): 5226 casts.append(path.to) 5227 path = path.this 5228 5229 if casts: 5230 dcolon_offset = next( 5231 i 5232 for i, t in enumerate(self._tokens[start_index:]) 5233 if t.token_type == TokenType.DCOLON 5234 ) 5235 end_token = self._tokens[start_index + dcolon_offset - 1] 5236 else: 5237 end_token = self._prev 5238 5239 if path: 5240 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5241 # it'll roundtrip to a string literal in GET_PATH 5242 if isinstance(path, exp.Identifier) and path.quoted: 5243 escape = True 5244 5245 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5246 5247 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5248 # Databricks transforms it back to the colon/dot notation 5249 if json_path: 5250 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5251 5252 if json_path_expr: 5253 json_path_expr.set("escape", escape) 5254 5255 this = self.expression( 5256 exp.JSONExtract, 5257 this=this, 5258 expression=json_path_expr, 5259 variant_extract=True, 5260 ) 5261 5262 while casts: 5263 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5264 5265 return this 5266 5267 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5268 return self._parse_types() 5269 5270 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5271 this = self._parse_bracket(this) 5272 5273 while self._match_set(self.COLUMN_OPERATORS): 5274 op_token = self._prev.token_type 5275 op = self.COLUMN_OPERATORS.get(op_token) 5276 5277 if op_token == TokenType.DCOLON: 5278 field = self._parse_dcolon() 5279 if not field: 5280 self.raise_error("Expected type") 5281 elif op and self._curr: 5282 field = self._parse_column_reference() or self._parse_bracket() 5283 else: 5284 field = self._parse_field(any_token=True, anonymous_func=True) 5285 5286 if isinstance(field, (exp.Func, exp.Window)) and this: 5287 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5288 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5289 this = exp.replace_tree( 5290 this, 5291 lambda n: ( 5292 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5293 if n.table 5294 else n.this 5295 ) 5296 if isinstance(n, exp.Column) 5297 else n, 5298 ) 5299 5300 if op: 5301 this = op(self, this, field) 5302 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5303 this = self.expression( 5304 exp.Column, 5305 comments=this.comments, 5306 this=field, 5307 table=this.this, 5308 db=this.args.get("table"), 5309 catalog=this.args.get("db"), 5310 ) 5311 elif isinstance(field, exp.Window): 5312 # Move the exp.Dot's to the window's function 5313 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5314 field.set("this", window_func) 5315 this = field 5316 else: 5317 this = self.expression(exp.Dot, this=this, expression=field) 5318 5319 if field and field.comments: 5320 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5321 5322 this = self._parse_bracket(this) 5323 5324 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5325 5326 def _parse_primary(self) -> t.Optional[exp.Expression]: 5327 if self._match_set(self.PRIMARY_PARSERS): 5328 token_type = self._prev.token_type 5329 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5330 5331 if token_type == TokenType.STRING: 5332 expressions = [primary] 5333 while self._match(TokenType.STRING): 5334 expressions.append(exp.Literal.string(self._prev.text)) 5335 5336 if len(expressions) > 1: 5337 return self.expression(exp.Concat, expressions=expressions) 5338 5339 return primary 5340 5341 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5342 return exp.Literal.number(f"0.{self._prev.text}") 5343 5344 if self._match(TokenType.L_PAREN): 5345 comments = self._prev_comments 5346 query = self._parse_select() 5347 5348 if query: 5349 expressions = [query] 5350 else: 5351 expressions = self._parse_expressions() 5352 5353 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5354 5355 if not this and self._match(TokenType.R_PAREN, advance=False): 5356 this = self.expression(exp.Tuple) 5357 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5358 this = self._parse_subquery(this=this, parse_alias=False) 5359 elif isinstance(this, exp.Subquery): 5360 this = self._parse_subquery( 5361 this=self._parse_set_operations(this), parse_alias=False 5362 ) 5363 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5364 this = self.expression(exp.Tuple, expressions=expressions) 5365 else: 5366 this = self.expression(exp.Paren, this=this) 5367 5368 if this: 5369 this.add_comments(comments) 5370 5371 self._match_r_paren(expression=this) 5372 return this 5373 5374 return None 5375 5376 def _parse_field( 5377 self, 5378 any_token: bool = False, 5379 tokens: t.Optional[t.Collection[TokenType]] = None, 5380 anonymous_func: bool = False, 5381 ) -> t.Optional[exp.Expression]: 5382 if anonymous_func: 5383 field = ( 5384 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5385 or self._parse_primary() 5386 ) 5387 else: 5388 field = self._parse_primary() or self._parse_function( 5389 anonymous=anonymous_func, any_token=any_token 5390 ) 5391 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5392 5393 def _parse_function( 5394 self, 5395 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5396 anonymous: bool = False, 5397 optional_parens: bool = True, 5398 any_token: bool = False, 5399 ) -> t.Optional[exp.Expression]: 5400 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5401 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5402 fn_syntax = False 5403 if ( 5404 self._match(TokenType.L_BRACE, advance=False) 5405 and self._next 5406 and self._next.text.upper() == "FN" 5407 ): 5408 self._advance(2) 5409 fn_syntax = True 5410 5411 func = self._parse_function_call( 5412 functions=functions, 5413 anonymous=anonymous, 5414 optional_parens=optional_parens, 5415 any_token=any_token, 5416 ) 5417 5418 if fn_syntax: 5419 self._match(TokenType.R_BRACE) 5420 5421 return func 5422 5423 def _parse_function_call( 5424 self, 5425 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5426 anonymous: bool = False, 5427 optional_parens: bool = True, 5428 any_token: bool = False, 5429 ) -> t.Optional[exp.Expression]: 5430 if not self._curr: 5431 return None 5432 5433 comments = self._curr.comments 5434 token_type = self._curr.token_type 5435 this = self._curr.text 5436 upper = this.upper() 5437 5438 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5439 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5440 self._advance() 5441 return self._parse_window(parser(self)) 5442 5443 if not self._next or self._next.token_type != TokenType.L_PAREN: 5444 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5445 self._advance() 5446 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5447 5448 return None 5449 5450 if any_token: 5451 if token_type in self.RESERVED_TOKENS: 5452 return None 5453 elif token_type not in self.FUNC_TOKENS: 5454 return None 5455 5456 self._advance(2) 5457 5458 parser = self.FUNCTION_PARSERS.get(upper) 5459 if parser and not anonymous: 5460 this = parser(self) 5461 else: 5462 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5463 5464 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5465 this = self.expression( 5466 subquery_predicate, comments=comments, this=self._parse_select() 5467 ) 5468 self._match_r_paren() 5469 return this 5470 5471 if functions is None: 5472 functions = self.FUNCTIONS 5473 5474 function = functions.get(upper) 5475 known_function = function and not anonymous 5476 5477 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5478 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5479 5480 post_func_comments = self._curr and self._curr.comments 5481 if known_function and post_func_comments: 5482 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5483 # call we'll construct it as exp.Anonymous, even if it's "known" 5484 if any( 5485 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5486 for comment in post_func_comments 5487 ): 5488 known_function = False 5489 5490 if alias and known_function: 5491 args = self._kv_to_prop_eq(args) 5492 5493 if known_function: 5494 func_builder = t.cast(t.Callable, function) 5495 5496 if "dialect" in func_builder.__code__.co_varnames: 5497 func = func_builder(args, dialect=self.dialect) 5498 else: 5499 func = func_builder(args) 5500 5501 func = self.validate_expression(func, args) 5502 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5503 func.meta["name"] = this 5504 5505 this = func 5506 else: 5507 if token_type == TokenType.IDENTIFIER: 5508 this = exp.Identifier(this=this, quoted=True) 5509 this = self.expression(exp.Anonymous, this=this, expressions=args) 5510 5511 if isinstance(this, exp.Expression): 5512 this.add_comments(comments) 5513 5514 self._match_r_paren(this) 5515 return self._parse_window(this) 5516 5517 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5518 return expression 5519 5520 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5521 transformed = [] 5522 5523 for index, e in enumerate(expressions): 5524 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5525 if isinstance(e, exp.Alias): 5526 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5527 5528 if not isinstance(e, exp.PropertyEQ): 5529 e = self.expression( 5530 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5531 ) 5532 5533 if isinstance(e.this, exp.Column): 5534 e.this.replace(e.this.this) 5535 else: 5536 e = self._to_prop_eq(e, index) 5537 5538 transformed.append(e) 5539 5540 return transformed 5541 5542 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5543 return self._parse_statement() 5544 5545 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5546 return self._parse_column_def(self._parse_id_var()) 5547 5548 def _parse_user_defined_function( 5549 self, kind: t.Optional[TokenType] = None 5550 ) -> t.Optional[exp.Expression]: 5551 this = self._parse_id_var() 5552 5553 while self._match(TokenType.DOT): 5554 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5555 5556 if not self._match(TokenType.L_PAREN): 5557 return this 5558 5559 expressions = self._parse_csv(self._parse_function_parameter) 5560 self._match_r_paren() 5561 return self.expression( 5562 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5563 ) 5564 5565 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5566 literal = self._parse_primary() 5567 if literal: 5568 return self.expression(exp.Introducer, this=token.text, expression=literal) 5569 5570 return self.expression(exp.Identifier, this=token.text) 5571 5572 def _parse_session_parameter(self) -> exp.SessionParameter: 5573 kind = None 5574 this = self._parse_id_var() or self._parse_primary() 5575 5576 if this and self._match(TokenType.DOT): 5577 kind = this.name 5578 this = self._parse_var() or self._parse_primary() 5579 5580 return self.expression(exp.SessionParameter, this=this, kind=kind) 5581 5582 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5583 return self._parse_id_var() 5584 5585 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5586 index = self._index 5587 5588 if self._match(TokenType.L_PAREN): 5589 expressions = t.cast( 5590 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5591 ) 5592 5593 if not self._match(TokenType.R_PAREN): 5594 self._retreat(index) 5595 else: 5596 expressions = [self._parse_lambda_arg()] 5597 5598 if self._match_set(self.LAMBDAS): 5599 return self.LAMBDAS[self._prev.token_type](self, expressions) 5600 5601 self._retreat(index) 5602 5603 this: t.Optional[exp.Expression] 5604 5605 if self._match(TokenType.DISTINCT): 5606 this = self.expression( 5607 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5608 ) 5609 else: 5610 this = self._parse_select_or_expression(alias=alias) 5611 5612 return self._parse_limit( 5613 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5614 ) 5615 5616 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5617 index = self._index 5618 if not self._match(TokenType.L_PAREN): 5619 return this 5620 5621 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5622 # expr can be of both types 5623 if self._match_set(self.SELECT_START_TOKENS): 5624 self._retreat(index) 5625 return this 5626 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5627 self._match_r_paren() 5628 return self.expression(exp.Schema, this=this, expressions=args) 5629 5630 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5631 return self._parse_column_def(self._parse_field(any_token=True)) 5632 5633 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5634 # column defs are not really columns, they're identifiers 5635 if isinstance(this, exp.Column): 5636 this = this.this 5637 5638 kind = self._parse_types(schema=True) 5639 5640 if self._match_text_seq("FOR", "ORDINALITY"): 5641 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5642 5643 constraints: t.List[exp.Expression] = [] 5644 5645 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5646 ("ALIAS", "MATERIALIZED") 5647 ): 5648 persisted = self._prev.text.upper() == "MATERIALIZED" 5649 constraint_kind = exp.ComputedColumnConstraint( 5650 this=self._parse_assignment(), 5651 persisted=persisted or self._match_text_seq("PERSISTED"), 5652 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5653 ) 5654 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5655 elif ( 5656 kind 5657 and self._match(TokenType.ALIAS, advance=False) 5658 and ( 5659 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5660 or (self._next and self._next.token_type == TokenType.L_PAREN) 5661 ) 5662 ): 5663 self._advance() 5664 constraints.append( 5665 self.expression( 5666 exp.ColumnConstraint, 5667 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5668 ) 5669 ) 5670 5671 while True: 5672 constraint = self._parse_column_constraint() 5673 if not constraint: 5674 break 5675 constraints.append(constraint) 5676 5677 if not kind and not constraints: 5678 return this 5679 5680 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5681 5682 def _parse_auto_increment( 5683 self, 5684 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5685 start = None 5686 increment = None 5687 5688 if self._match(TokenType.L_PAREN, advance=False): 5689 args = self._parse_wrapped_csv(self._parse_bitwise) 5690 start = seq_get(args, 0) 5691 increment = seq_get(args, 1) 5692 elif self._match_text_seq("START"): 5693 start = self._parse_bitwise() 5694 self._match_text_seq("INCREMENT") 5695 increment = self._parse_bitwise() 5696 5697 if start and increment: 5698 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5699 5700 return exp.AutoIncrementColumnConstraint() 5701 5702 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5703 if not self._match_text_seq("REFRESH"): 5704 self._retreat(self._index - 1) 5705 return None 5706 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5707 5708 def _parse_compress(self) -> exp.CompressColumnConstraint: 5709 if self._match(TokenType.L_PAREN, advance=False): 5710 return self.expression( 5711 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5712 ) 5713 5714 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5715 5716 def _parse_generated_as_identity( 5717 self, 5718 ) -> ( 5719 exp.GeneratedAsIdentityColumnConstraint 5720 | exp.ComputedColumnConstraint 5721 | exp.GeneratedAsRowColumnConstraint 5722 ): 5723 if self._match_text_seq("BY", "DEFAULT"): 5724 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5725 this = self.expression( 5726 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5727 ) 5728 else: 5729 self._match_text_seq("ALWAYS") 5730 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5731 5732 self._match(TokenType.ALIAS) 5733 5734 if self._match_text_seq("ROW"): 5735 start = self._match_text_seq("START") 5736 if not start: 5737 self._match(TokenType.END) 5738 hidden = self._match_text_seq("HIDDEN") 5739 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5740 5741 identity = self._match_text_seq("IDENTITY") 5742 5743 if self._match(TokenType.L_PAREN): 5744 if self._match(TokenType.START_WITH): 5745 this.set("start", self._parse_bitwise()) 5746 if self._match_text_seq("INCREMENT", "BY"): 5747 this.set("increment", self._parse_bitwise()) 5748 if self._match_text_seq("MINVALUE"): 5749 this.set("minvalue", self._parse_bitwise()) 5750 if self._match_text_seq("MAXVALUE"): 5751 this.set("maxvalue", self._parse_bitwise()) 5752 5753 if self._match_text_seq("CYCLE"): 5754 this.set("cycle", True) 5755 elif self._match_text_seq("NO", "CYCLE"): 5756 this.set("cycle", False) 5757 5758 if not identity: 5759 this.set("expression", self._parse_range()) 5760 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5761 args = self._parse_csv(self._parse_bitwise) 5762 this.set("start", seq_get(args, 0)) 5763 this.set("increment", seq_get(args, 1)) 5764 5765 self._match_r_paren() 5766 5767 return this 5768 5769 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5770 self._match_text_seq("LENGTH") 5771 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5772 5773 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5774 if self._match_text_seq("NULL"): 5775 return self.expression(exp.NotNullColumnConstraint) 5776 if self._match_text_seq("CASESPECIFIC"): 5777 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5778 if self._match_text_seq("FOR", "REPLICATION"): 5779 return self.expression(exp.NotForReplicationColumnConstraint) 5780 5781 # Unconsume the `NOT` token 5782 self._retreat(self._index - 1) 5783 return None 5784 5785 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5786 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5787 5788 procedure_option_follows = ( 5789 self._match(TokenType.WITH, advance=False) 5790 and self._next 5791 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5792 ) 5793 5794 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5795 return self.expression( 5796 exp.ColumnConstraint, 5797 this=this, 5798 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5799 ) 5800 5801 return this 5802 5803 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5804 if not self._match(TokenType.CONSTRAINT): 5805 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5806 5807 return self.expression( 5808 exp.Constraint, 5809 this=self._parse_id_var(), 5810 expressions=self._parse_unnamed_constraints(), 5811 ) 5812 5813 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5814 constraints = [] 5815 while True: 5816 constraint = self._parse_unnamed_constraint() or self._parse_function() 5817 if not constraint: 5818 break 5819 constraints.append(constraint) 5820 5821 return constraints 5822 5823 def _parse_unnamed_constraint( 5824 self, constraints: t.Optional[t.Collection[str]] = None 5825 ) -> t.Optional[exp.Expression]: 5826 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5827 constraints or self.CONSTRAINT_PARSERS 5828 ): 5829 return None 5830 5831 constraint = self._prev.text.upper() 5832 if constraint not in self.CONSTRAINT_PARSERS: 5833 self.raise_error(f"No parser found for schema constraint {constraint}.") 5834 5835 return self.CONSTRAINT_PARSERS[constraint](self) 5836 5837 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5838 return self._parse_id_var(any_token=False) 5839 5840 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5841 self._match_text_seq("KEY") 5842 return self.expression( 5843 exp.UniqueColumnConstraint, 5844 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5845 this=self._parse_schema(self._parse_unique_key()), 5846 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5847 on_conflict=self._parse_on_conflict(), 5848 ) 5849 5850 def _parse_key_constraint_options(self) -> t.List[str]: 5851 options = [] 5852 while True: 5853 if not self._curr: 5854 break 5855 5856 if self._match(TokenType.ON): 5857 action = None 5858 on = self._advance_any() and self._prev.text 5859 5860 if self._match_text_seq("NO", "ACTION"): 5861 action = "NO ACTION" 5862 elif self._match_text_seq("CASCADE"): 5863 action = "CASCADE" 5864 elif self._match_text_seq("RESTRICT"): 5865 action = "RESTRICT" 5866 elif self._match_pair(TokenType.SET, TokenType.NULL): 5867 action = "SET NULL" 5868 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5869 action = "SET DEFAULT" 5870 else: 5871 self.raise_error("Invalid key constraint") 5872 5873 options.append(f"ON {on} {action}") 5874 else: 5875 var = self._parse_var_from_options( 5876 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5877 ) 5878 if not var: 5879 break 5880 options.append(var.name) 5881 5882 return options 5883 5884 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5885 if match and not self._match(TokenType.REFERENCES): 5886 return None 5887 5888 expressions = None 5889 this = self._parse_table(schema=True) 5890 options = self._parse_key_constraint_options() 5891 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5892 5893 def _parse_foreign_key(self) -> exp.ForeignKey: 5894 expressions = self._parse_wrapped_id_vars() 5895 reference = self._parse_references() 5896 options = {} 5897 5898 while self._match(TokenType.ON): 5899 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5900 self.raise_error("Expected DELETE or UPDATE") 5901 5902 kind = self._prev.text.lower() 5903 5904 if self._match_text_seq("NO", "ACTION"): 5905 action = "NO ACTION" 5906 elif self._match(TokenType.SET): 5907 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5908 action = "SET " + self._prev.text.upper() 5909 else: 5910 self._advance() 5911 action = self._prev.text.upper() 5912 5913 options[kind] = action 5914 5915 return self.expression( 5916 exp.ForeignKey, 5917 expressions=expressions, 5918 reference=reference, 5919 **options, # type: ignore 5920 ) 5921 5922 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5923 return self._parse_ordered() or self._parse_field() 5924 5925 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5926 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5927 self._retreat(self._index - 1) 5928 return None 5929 5930 id_vars = self._parse_wrapped_id_vars() 5931 return self.expression( 5932 exp.PeriodForSystemTimeConstraint, 5933 this=seq_get(id_vars, 0), 5934 expression=seq_get(id_vars, 1), 5935 ) 5936 5937 def _parse_primary_key( 5938 self, wrapped_optional: bool = False, in_props: bool = False 5939 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5940 desc = ( 5941 self._match_set((TokenType.ASC, TokenType.DESC)) 5942 and self._prev.token_type == TokenType.DESC 5943 ) 5944 5945 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5946 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5947 5948 expressions = self._parse_wrapped_csv( 5949 self._parse_primary_key_part, optional=wrapped_optional 5950 ) 5951 options = self._parse_key_constraint_options() 5952 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5953 5954 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5955 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5956 5957 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5958 """ 5959 Parses a datetime column in ODBC format. We parse the column into the corresponding 5960 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5961 same as we did for `DATE('yyyy-mm-dd')`. 5962 5963 Reference: 5964 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5965 """ 5966 self._match(TokenType.VAR) 5967 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5968 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5969 if not self._match(TokenType.R_BRACE): 5970 self.raise_error("Expected }") 5971 return expression 5972 5973 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5974 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5975 return this 5976 5977 bracket_kind = self._prev.token_type 5978 if ( 5979 bracket_kind == TokenType.L_BRACE 5980 and self._curr 5981 and self._curr.token_type == TokenType.VAR 5982 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5983 ): 5984 return self._parse_odbc_datetime_literal() 5985 5986 expressions = self._parse_csv( 5987 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5988 ) 5989 5990 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5991 self.raise_error("Expected ]") 5992 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5993 self.raise_error("Expected }") 5994 5995 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5996 if bracket_kind == TokenType.L_BRACE: 5997 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5998 elif not this: 5999 this = build_array_constructor( 6000 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6001 ) 6002 else: 6003 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6004 if constructor_type: 6005 return build_array_constructor( 6006 constructor_type, 6007 args=expressions, 6008 bracket_kind=bracket_kind, 6009 dialect=self.dialect, 6010 ) 6011 6012 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6013 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6014 6015 self._add_comments(this) 6016 return self._parse_bracket(this) 6017 6018 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6019 if self._match(TokenType.COLON): 6020 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6021 return this 6022 6023 def _parse_case(self) -> t.Optional[exp.Expression]: 6024 ifs = [] 6025 default = None 6026 6027 comments = self._prev_comments 6028 expression = self._parse_assignment() 6029 6030 while self._match(TokenType.WHEN): 6031 this = self._parse_assignment() 6032 self._match(TokenType.THEN) 6033 then = self._parse_assignment() 6034 ifs.append(self.expression(exp.If, this=this, true=then)) 6035 6036 if self._match(TokenType.ELSE): 6037 default = self._parse_assignment() 6038 6039 if not self._match(TokenType.END): 6040 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6041 default = exp.column("interval") 6042 else: 6043 self.raise_error("Expected END after CASE", self._prev) 6044 6045 return self.expression( 6046 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6047 ) 6048 6049 def _parse_if(self) -> t.Optional[exp.Expression]: 6050 if self._match(TokenType.L_PAREN): 6051 args = self._parse_csv(self._parse_assignment) 6052 this = self.validate_expression(exp.If.from_arg_list(args), args) 6053 self._match_r_paren() 6054 else: 6055 index = self._index - 1 6056 6057 if self.NO_PAREN_IF_COMMANDS and index == 0: 6058 return self._parse_as_command(self._prev) 6059 6060 condition = self._parse_assignment() 6061 6062 if not condition: 6063 self._retreat(index) 6064 return None 6065 6066 self._match(TokenType.THEN) 6067 true = self._parse_assignment() 6068 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6069 self._match(TokenType.END) 6070 this = self.expression(exp.If, this=condition, true=true, false=false) 6071 6072 return this 6073 6074 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6075 if not self._match_text_seq("VALUE", "FOR"): 6076 self._retreat(self._index - 1) 6077 return None 6078 6079 return self.expression( 6080 exp.NextValueFor, 6081 this=self._parse_column(), 6082 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6083 ) 6084 6085 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6086 this = self._parse_function() or self._parse_var_or_string(upper=True) 6087 6088 if self._match(TokenType.FROM): 6089 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6090 6091 if not self._match(TokenType.COMMA): 6092 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6093 6094 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6095 6096 def _parse_gap_fill(self) -> exp.GapFill: 6097 self._match(TokenType.TABLE) 6098 this = self._parse_table() 6099 6100 self._match(TokenType.COMMA) 6101 args = [this, *self._parse_csv(self._parse_lambda)] 6102 6103 gap_fill = exp.GapFill.from_arg_list(args) 6104 return self.validate_expression(gap_fill, args) 6105 6106 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6107 this = self._parse_assignment() 6108 6109 if not self._match(TokenType.ALIAS): 6110 if self._match(TokenType.COMMA): 6111 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6112 6113 self.raise_error("Expected AS after CAST") 6114 6115 fmt = None 6116 to = self._parse_types() 6117 6118 if self._match(TokenType.FORMAT): 6119 fmt_string = self._parse_string() 6120 fmt = self._parse_at_time_zone(fmt_string) 6121 6122 if not to: 6123 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6124 if to.this in exp.DataType.TEMPORAL_TYPES: 6125 this = self.expression( 6126 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6127 this=this, 6128 format=exp.Literal.string( 6129 format_time( 6130 fmt_string.this if fmt_string else "", 6131 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6132 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6133 ) 6134 ), 6135 safe=safe, 6136 ) 6137 6138 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6139 this.set("zone", fmt.args["zone"]) 6140 return this 6141 elif not to: 6142 self.raise_error("Expected TYPE after CAST") 6143 elif isinstance(to, exp.Identifier): 6144 to = exp.DataType.build(to.name, udt=True) 6145 elif to.this == exp.DataType.Type.CHAR: 6146 if self._match(TokenType.CHARACTER_SET): 6147 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6148 6149 return self.expression( 6150 exp.Cast if strict else exp.TryCast, 6151 this=this, 6152 to=to, 6153 format=fmt, 6154 safe=safe, 6155 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6156 ) 6157 6158 def _parse_string_agg(self) -> exp.GroupConcat: 6159 if self._match(TokenType.DISTINCT): 6160 args: t.List[t.Optional[exp.Expression]] = [ 6161 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6162 ] 6163 if self._match(TokenType.COMMA): 6164 args.extend(self._parse_csv(self._parse_assignment)) 6165 else: 6166 args = self._parse_csv(self._parse_assignment) # type: ignore 6167 6168 if self._match_text_seq("ON", "OVERFLOW"): 6169 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6170 if self._match_text_seq("ERROR"): 6171 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6172 else: 6173 self._match_text_seq("TRUNCATE") 6174 on_overflow = self.expression( 6175 exp.OverflowTruncateBehavior, 6176 this=self._parse_string(), 6177 with_count=( 6178 self._match_text_seq("WITH", "COUNT") 6179 or not self._match_text_seq("WITHOUT", "COUNT") 6180 ), 6181 ) 6182 else: 6183 on_overflow = None 6184 6185 index = self._index 6186 if not self._match(TokenType.R_PAREN) and args: 6187 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6188 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6189 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6190 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6191 6192 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6193 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6194 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6195 if not self._match_text_seq("WITHIN", "GROUP"): 6196 self._retreat(index) 6197 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6198 6199 # The corresponding match_r_paren will be called in parse_function (caller) 6200 self._match_l_paren() 6201 6202 return self.expression( 6203 exp.GroupConcat, 6204 this=self._parse_order(this=seq_get(args, 0)), 6205 separator=seq_get(args, 1), 6206 on_overflow=on_overflow, 6207 ) 6208 6209 def _parse_convert( 6210 self, strict: bool, safe: t.Optional[bool] = None 6211 ) -> t.Optional[exp.Expression]: 6212 this = self._parse_bitwise() 6213 6214 if self._match(TokenType.USING): 6215 to: t.Optional[exp.Expression] = self.expression( 6216 exp.CharacterSet, this=self._parse_var() 6217 ) 6218 elif self._match(TokenType.COMMA): 6219 to = self._parse_types() 6220 else: 6221 to = None 6222 6223 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6224 6225 def _parse_xml_table(self) -> exp.XMLTable: 6226 namespaces = None 6227 passing = None 6228 columns = None 6229 6230 if self._match_text_seq("XMLNAMESPACES", "("): 6231 namespaces = self._parse_xml_namespace() 6232 self._match_text_seq(")", ",") 6233 6234 this = self._parse_string() 6235 6236 if self._match_text_seq("PASSING"): 6237 # The BY VALUE keywords are optional and are provided for semantic clarity 6238 self._match_text_seq("BY", "VALUE") 6239 passing = self._parse_csv(self._parse_column) 6240 6241 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6242 6243 if self._match_text_seq("COLUMNS"): 6244 columns = self._parse_csv(self._parse_field_def) 6245 6246 return self.expression( 6247 exp.XMLTable, 6248 this=this, 6249 namespaces=namespaces, 6250 passing=passing, 6251 columns=columns, 6252 by_ref=by_ref, 6253 ) 6254 6255 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6256 namespaces = [] 6257 6258 while True: 6259 if self._match_text_seq("DEFAULT"): 6260 uri = self._parse_string() 6261 else: 6262 uri = self._parse_alias(self._parse_string()) 6263 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6264 if not self._match(TokenType.COMMA): 6265 break 6266 6267 return namespaces 6268 6269 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6270 """ 6271 There are generally two variants of the DECODE function: 6272 6273 - DECODE(bin, charset) 6274 - DECODE(expression, search, result [, search, result] ... [, default]) 6275 6276 The second variant will always be parsed into a CASE expression. Note that NULL 6277 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6278 instead of relying on pattern matching. 6279 """ 6280 args = self._parse_csv(self._parse_assignment) 6281 6282 if len(args) < 3: 6283 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6284 6285 expression, *expressions = args 6286 if not expression: 6287 return None 6288 6289 ifs = [] 6290 for search, result in zip(expressions[::2], expressions[1::2]): 6291 if not search or not result: 6292 return None 6293 6294 if isinstance(search, exp.Literal): 6295 ifs.append( 6296 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6297 ) 6298 elif isinstance(search, exp.Null): 6299 ifs.append( 6300 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6301 ) 6302 else: 6303 cond = exp.or_( 6304 exp.EQ(this=expression.copy(), expression=search), 6305 exp.and_( 6306 exp.Is(this=expression.copy(), expression=exp.Null()), 6307 exp.Is(this=search.copy(), expression=exp.Null()), 6308 copy=False, 6309 ), 6310 copy=False, 6311 ) 6312 ifs.append(exp.If(this=cond, true=result)) 6313 6314 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6315 6316 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6317 self._match_text_seq("KEY") 6318 key = self._parse_column() 6319 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6320 self._match_text_seq("VALUE") 6321 value = self._parse_bitwise() 6322 6323 if not key and not value: 6324 return None 6325 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6326 6327 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6328 if not this or not self._match_text_seq("FORMAT", "JSON"): 6329 return this 6330 6331 return self.expression(exp.FormatJson, this=this) 6332 6333 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6334 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6335 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6336 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6337 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6338 else: 6339 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6340 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6341 6342 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6343 6344 if not empty and not error and not null: 6345 return None 6346 6347 return self.expression( 6348 exp.OnCondition, 6349 empty=empty, 6350 error=error, 6351 null=null, 6352 ) 6353 6354 def _parse_on_handling( 6355 self, on: str, *values: str 6356 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6357 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6358 for value in values: 6359 if self._match_text_seq(value, "ON", on): 6360 return f"{value} ON {on}" 6361 6362 index = self._index 6363 if self._match(TokenType.DEFAULT): 6364 default_value = self._parse_bitwise() 6365 if self._match_text_seq("ON", on): 6366 return default_value 6367 6368 self._retreat(index) 6369 6370 return None 6371 6372 @t.overload 6373 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6374 6375 @t.overload 6376 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6377 6378 def _parse_json_object(self, agg=False): 6379 star = self._parse_star() 6380 expressions = ( 6381 [star] 6382 if star 6383 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6384 ) 6385 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6386 6387 unique_keys = None 6388 if self._match_text_seq("WITH", "UNIQUE"): 6389 unique_keys = True 6390 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6391 unique_keys = False 6392 6393 self._match_text_seq("KEYS") 6394 6395 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6396 self._parse_type() 6397 ) 6398 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6399 6400 return self.expression( 6401 exp.JSONObjectAgg if agg else exp.JSONObject, 6402 expressions=expressions, 6403 null_handling=null_handling, 6404 unique_keys=unique_keys, 6405 return_type=return_type, 6406 encoding=encoding, 6407 ) 6408 6409 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6410 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6411 if not self._match_text_seq("NESTED"): 6412 this = self._parse_id_var() 6413 kind = self._parse_types(allow_identifiers=False) 6414 nested = None 6415 else: 6416 this = None 6417 kind = None 6418 nested = True 6419 6420 path = self._match_text_seq("PATH") and self._parse_string() 6421 nested_schema = nested and self._parse_json_schema() 6422 6423 return self.expression( 6424 exp.JSONColumnDef, 6425 this=this, 6426 kind=kind, 6427 path=path, 6428 nested_schema=nested_schema, 6429 ) 6430 6431 def _parse_json_schema(self) -> exp.JSONSchema: 6432 self._match_text_seq("COLUMNS") 6433 return self.expression( 6434 exp.JSONSchema, 6435 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6436 ) 6437 6438 def _parse_json_table(self) -> exp.JSONTable: 6439 this = self._parse_format_json(self._parse_bitwise()) 6440 path = self._match(TokenType.COMMA) and self._parse_string() 6441 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6442 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6443 schema = self._parse_json_schema() 6444 6445 return exp.JSONTable( 6446 this=this, 6447 schema=schema, 6448 path=path, 6449 error_handling=error_handling, 6450 empty_handling=empty_handling, 6451 ) 6452 6453 def _parse_match_against(self) -> exp.MatchAgainst: 6454 expressions = self._parse_csv(self._parse_column) 6455 6456 self._match_text_seq(")", "AGAINST", "(") 6457 6458 this = self._parse_string() 6459 6460 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6461 modifier = "IN NATURAL LANGUAGE MODE" 6462 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6463 modifier = f"{modifier} WITH QUERY EXPANSION" 6464 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6465 modifier = "IN BOOLEAN MODE" 6466 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6467 modifier = "WITH QUERY EXPANSION" 6468 else: 6469 modifier = None 6470 6471 return self.expression( 6472 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6473 ) 6474 6475 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6476 def _parse_open_json(self) -> exp.OpenJSON: 6477 this = self._parse_bitwise() 6478 path = self._match(TokenType.COMMA) and self._parse_string() 6479 6480 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6481 this = self._parse_field(any_token=True) 6482 kind = self._parse_types() 6483 path = self._parse_string() 6484 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6485 6486 return self.expression( 6487 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6488 ) 6489 6490 expressions = None 6491 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6492 self._match_l_paren() 6493 expressions = self._parse_csv(_parse_open_json_column_def) 6494 6495 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6496 6497 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6498 args = self._parse_csv(self._parse_bitwise) 6499 6500 if self._match(TokenType.IN): 6501 return self.expression( 6502 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6503 ) 6504 6505 if haystack_first: 6506 haystack = seq_get(args, 0) 6507 needle = seq_get(args, 1) 6508 else: 6509 haystack = seq_get(args, 1) 6510 needle = seq_get(args, 0) 6511 6512 return self.expression( 6513 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6514 ) 6515 6516 def _parse_predict(self) -> exp.Predict: 6517 self._match_text_seq("MODEL") 6518 this = self._parse_table() 6519 6520 self._match(TokenType.COMMA) 6521 self._match_text_seq("TABLE") 6522 6523 return self.expression( 6524 exp.Predict, 6525 this=this, 6526 expression=self._parse_table(), 6527 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6528 ) 6529 6530 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6531 args = self._parse_csv(self._parse_table) 6532 return exp.JoinHint(this=func_name.upper(), expressions=args) 6533 6534 def _parse_substring(self) -> exp.Substring: 6535 # Postgres supports the form: substring(string [from int] [for int]) 6536 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6537 6538 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6539 6540 if self._match(TokenType.FROM): 6541 args.append(self._parse_bitwise()) 6542 if self._match(TokenType.FOR): 6543 if len(args) == 1: 6544 args.append(exp.Literal.number(1)) 6545 args.append(self._parse_bitwise()) 6546 6547 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6548 6549 def _parse_trim(self) -> exp.Trim: 6550 # https://www.w3resource.com/sql/character-functions/trim.php 6551 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6552 6553 position = None 6554 collation = None 6555 expression = None 6556 6557 if self._match_texts(self.TRIM_TYPES): 6558 position = self._prev.text.upper() 6559 6560 this = self._parse_bitwise() 6561 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6562 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6563 expression = self._parse_bitwise() 6564 6565 if invert_order: 6566 this, expression = expression, this 6567 6568 if self._match(TokenType.COLLATE): 6569 collation = self._parse_bitwise() 6570 6571 return self.expression( 6572 exp.Trim, this=this, position=position, expression=expression, collation=collation 6573 ) 6574 6575 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6576 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6577 6578 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6579 return self._parse_window(self._parse_id_var(), alias=True) 6580 6581 def _parse_respect_or_ignore_nulls( 6582 self, this: t.Optional[exp.Expression] 6583 ) -> t.Optional[exp.Expression]: 6584 if self._match_text_seq("IGNORE", "NULLS"): 6585 return self.expression(exp.IgnoreNulls, this=this) 6586 if self._match_text_seq("RESPECT", "NULLS"): 6587 return self.expression(exp.RespectNulls, this=this) 6588 return this 6589 6590 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6591 if self._match(TokenType.HAVING): 6592 self._match_texts(("MAX", "MIN")) 6593 max = self._prev.text.upper() != "MIN" 6594 return self.expression( 6595 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6596 ) 6597 6598 return this 6599 6600 def _parse_window( 6601 self, this: t.Optional[exp.Expression], alias: bool = False 6602 ) -> t.Optional[exp.Expression]: 6603 func = this 6604 comments = func.comments if isinstance(func, exp.Expression) else None 6605 6606 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6607 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6608 if self._match_text_seq("WITHIN", "GROUP"): 6609 order = self._parse_wrapped(self._parse_order) 6610 this = self.expression(exp.WithinGroup, this=this, expression=order) 6611 6612 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6613 self._match(TokenType.WHERE) 6614 this = self.expression( 6615 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6616 ) 6617 self._match_r_paren() 6618 6619 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6620 # Some dialects choose to implement and some do not. 6621 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6622 6623 # There is some code above in _parse_lambda that handles 6624 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6625 6626 # The below changes handle 6627 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6628 6629 # Oracle allows both formats 6630 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6631 # and Snowflake chose to do the same for familiarity 6632 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6633 if isinstance(this, exp.AggFunc): 6634 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6635 6636 if ignore_respect and ignore_respect is not this: 6637 ignore_respect.replace(ignore_respect.this) 6638 this = self.expression(ignore_respect.__class__, this=this) 6639 6640 this = self._parse_respect_or_ignore_nulls(this) 6641 6642 # bigquery select from window x AS (partition by ...) 6643 if alias: 6644 over = None 6645 self._match(TokenType.ALIAS) 6646 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6647 return this 6648 else: 6649 over = self._prev.text.upper() 6650 6651 if comments and isinstance(func, exp.Expression): 6652 func.pop_comments() 6653 6654 if not self._match(TokenType.L_PAREN): 6655 return self.expression( 6656 exp.Window, 6657 comments=comments, 6658 this=this, 6659 alias=self._parse_id_var(False), 6660 over=over, 6661 ) 6662 6663 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6664 6665 first = self._match(TokenType.FIRST) 6666 if self._match_text_seq("LAST"): 6667 first = False 6668 6669 partition, order = self._parse_partition_and_order() 6670 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6671 6672 if kind: 6673 self._match(TokenType.BETWEEN) 6674 start = self._parse_window_spec() 6675 self._match(TokenType.AND) 6676 end = self._parse_window_spec() 6677 6678 spec = self.expression( 6679 exp.WindowSpec, 6680 kind=kind, 6681 start=start["value"], 6682 start_side=start["side"], 6683 end=end["value"], 6684 end_side=end["side"], 6685 ) 6686 else: 6687 spec = None 6688 6689 self._match_r_paren() 6690 6691 window = self.expression( 6692 exp.Window, 6693 comments=comments, 6694 this=this, 6695 partition_by=partition, 6696 order=order, 6697 spec=spec, 6698 alias=window_alias, 6699 over=over, 6700 first=first, 6701 ) 6702 6703 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6704 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6705 return self._parse_window(window, alias=alias) 6706 6707 return window 6708 6709 def _parse_partition_and_order( 6710 self, 6711 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6712 return self._parse_partition_by(), self._parse_order() 6713 6714 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6715 self._match(TokenType.BETWEEN) 6716 6717 return { 6718 "value": ( 6719 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6720 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6721 or self._parse_bitwise() 6722 ), 6723 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6724 } 6725 6726 def _parse_alias( 6727 self, this: t.Optional[exp.Expression], explicit: bool = False 6728 ) -> t.Optional[exp.Expression]: 6729 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6730 # so this section tries to parse the clause version and if it fails, it treats the token 6731 # as an identifier (alias) 6732 if self._can_parse_limit_or_offset(): 6733 return this 6734 6735 any_token = self._match(TokenType.ALIAS) 6736 comments = self._prev_comments or [] 6737 6738 if explicit and not any_token: 6739 return this 6740 6741 if self._match(TokenType.L_PAREN): 6742 aliases = self.expression( 6743 exp.Aliases, 6744 comments=comments, 6745 this=this, 6746 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6747 ) 6748 self._match_r_paren(aliases) 6749 return aliases 6750 6751 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6752 self.STRING_ALIASES and self._parse_string_as_identifier() 6753 ) 6754 6755 if alias: 6756 comments.extend(alias.pop_comments()) 6757 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6758 column = this.this 6759 6760 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6761 if not this.comments and column and column.comments: 6762 this.comments = column.pop_comments() 6763 6764 return this 6765 6766 def _parse_id_var( 6767 self, 6768 any_token: bool = True, 6769 tokens: t.Optional[t.Collection[TokenType]] = None, 6770 ) -> t.Optional[exp.Expression]: 6771 expression = self._parse_identifier() 6772 if not expression and ( 6773 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6774 ): 6775 quoted = self._prev.token_type == TokenType.STRING 6776 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6777 6778 return expression 6779 6780 def _parse_string(self) -> t.Optional[exp.Expression]: 6781 if self._match_set(self.STRING_PARSERS): 6782 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6783 return self._parse_placeholder() 6784 6785 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6786 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6787 6788 def _parse_number(self) -> t.Optional[exp.Expression]: 6789 if self._match_set(self.NUMERIC_PARSERS): 6790 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6791 return self._parse_placeholder() 6792 6793 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6794 if self._match(TokenType.IDENTIFIER): 6795 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6796 return self._parse_placeholder() 6797 6798 def _parse_var( 6799 self, 6800 any_token: bool = False, 6801 tokens: t.Optional[t.Collection[TokenType]] = None, 6802 upper: bool = False, 6803 ) -> t.Optional[exp.Expression]: 6804 if ( 6805 (any_token and self._advance_any()) 6806 or self._match(TokenType.VAR) 6807 or (self._match_set(tokens) if tokens else False) 6808 ): 6809 return self.expression( 6810 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6811 ) 6812 return self._parse_placeholder() 6813 6814 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6815 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6816 self._advance() 6817 return self._prev 6818 return None 6819 6820 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6821 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6822 6823 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6824 return self._parse_primary() or self._parse_var(any_token=True) 6825 6826 def _parse_null(self) -> t.Optional[exp.Expression]: 6827 if self._match_set(self.NULL_TOKENS): 6828 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6829 return self._parse_placeholder() 6830 6831 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6832 if self._match(TokenType.TRUE): 6833 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6834 if self._match(TokenType.FALSE): 6835 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6836 return self._parse_placeholder() 6837 6838 def _parse_star(self) -> t.Optional[exp.Expression]: 6839 if self._match(TokenType.STAR): 6840 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6841 return self._parse_placeholder() 6842 6843 def _parse_parameter(self) -> exp.Parameter: 6844 this = self._parse_identifier() or self._parse_primary_or_var() 6845 return self.expression(exp.Parameter, this=this) 6846 6847 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6848 if self._match_set(self.PLACEHOLDER_PARSERS): 6849 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6850 if placeholder: 6851 return placeholder 6852 self._advance(-1) 6853 return None 6854 6855 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6856 if not self._match_texts(keywords): 6857 return None 6858 if self._match(TokenType.L_PAREN, advance=False): 6859 return self._parse_wrapped_csv(self._parse_expression) 6860 6861 expression = self._parse_expression() 6862 return [expression] if expression else None 6863 6864 def _parse_csv( 6865 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6866 ) -> t.List[exp.Expression]: 6867 parse_result = parse_method() 6868 items = [parse_result] if parse_result is not None else [] 6869 6870 while self._match(sep): 6871 self._add_comments(parse_result) 6872 parse_result = parse_method() 6873 if parse_result is not None: 6874 items.append(parse_result) 6875 6876 return items 6877 6878 def _parse_tokens( 6879 self, parse_method: t.Callable, expressions: t.Dict 6880 ) -> t.Optional[exp.Expression]: 6881 this = parse_method() 6882 6883 while self._match_set(expressions): 6884 this = self.expression( 6885 expressions[self._prev.token_type], 6886 this=this, 6887 comments=self._prev_comments, 6888 expression=parse_method(), 6889 ) 6890 6891 return this 6892 6893 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6894 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6895 6896 def _parse_wrapped_csv( 6897 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6898 ) -> t.List[exp.Expression]: 6899 return self._parse_wrapped( 6900 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6901 ) 6902 6903 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6904 wrapped = self._match(TokenType.L_PAREN) 6905 if not wrapped and not optional: 6906 self.raise_error("Expecting (") 6907 parse_result = parse_method() 6908 if wrapped: 6909 self._match_r_paren() 6910 return parse_result 6911 6912 def _parse_expressions(self) -> t.List[exp.Expression]: 6913 return self._parse_csv(self._parse_expression) 6914 6915 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6916 return self._parse_select() or self._parse_set_operations( 6917 self._parse_alias(self._parse_assignment(), explicit=True) 6918 if alias 6919 else self._parse_assignment() 6920 ) 6921 6922 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6923 return self._parse_query_modifiers( 6924 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6925 ) 6926 6927 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6928 this = None 6929 if self._match_texts(self.TRANSACTION_KIND): 6930 this = self._prev.text 6931 6932 self._match_texts(("TRANSACTION", "WORK")) 6933 6934 modes = [] 6935 while True: 6936 mode = [] 6937 while self._match(TokenType.VAR): 6938 mode.append(self._prev.text) 6939 6940 if mode: 6941 modes.append(" ".join(mode)) 6942 if not self._match(TokenType.COMMA): 6943 break 6944 6945 return self.expression(exp.Transaction, this=this, modes=modes) 6946 6947 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6948 chain = None 6949 savepoint = None 6950 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6951 6952 self._match_texts(("TRANSACTION", "WORK")) 6953 6954 if self._match_text_seq("TO"): 6955 self._match_text_seq("SAVEPOINT") 6956 savepoint = self._parse_id_var() 6957 6958 if self._match(TokenType.AND): 6959 chain = not self._match_text_seq("NO") 6960 self._match_text_seq("CHAIN") 6961 6962 if is_rollback: 6963 return self.expression(exp.Rollback, savepoint=savepoint) 6964 6965 return self.expression(exp.Commit, chain=chain) 6966 6967 def _parse_refresh(self) -> exp.Refresh: 6968 self._match(TokenType.TABLE) 6969 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6970 6971 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6972 if not self._match_text_seq("ADD"): 6973 return None 6974 6975 self._match(TokenType.COLUMN) 6976 exists_column = self._parse_exists(not_=True) 6977 expression = self._parse_field_def() 6978 6979 if expression: 6980 expression.set("exists", exists_column) 6981 6982 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6983 if self._match_texts(("FIRST", "AFTER")): 6984 position = self._prev.text 6985 column_position = self.expression( 6986 exp.ColumnPosition, this=self._parse_column(), position=position 6987 ) 6988 expression.set("position", column_position) 6989 6990 return expression 6991 6992 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6993 drop = self._match(TokenType.DROP) and self._parse_drop() 6994 if drop and not isinstance(drop, exp.Command): 6995 drop.set("kind", drop.args.get("kind", "COLUMN")) 6996 return drop 6997 6998 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6999 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7000 return self.expression( 7001 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7002 ) 7003 7004 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7005 index = self._index - 1 7006 7007 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7008 return self._parse_csv( 7009 lambda: self.expression( 7010 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7011 ) 7012 ) 7013 7014 self._retreat(index) 7015 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7016 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7017 7018 if self._match_text_seq("ADD", "COLUMNS"): 7019 schema = self._parse_schema() 7020 if schema: 7021 return [schema] 7022 return [] 7023 7024 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7025 7026 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7027 if self._match_texts(self.ALTER_ALTER_PARSERS): 7028 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7029 7030 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7031 # keyword after ALTER we default to parsing this statement 7032 self._match(TokenType.COLUMN) 7033 column = self._parse_field(any_token=True) 7034 7035 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7036 return self.expression(exp.AlterColumn, this=column, drop=True) 7037 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7038 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7039 if self._match(TokenType.COMMENT): 7040 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7041 if self._match_text_seq("DROP", "NOT", "NULL"): 7042 return self.expression( 7043 exp.AlterColumn, 7044 this=column, 7045 drop=True, 7046 allow_null=True, 7047 ) 7048 if self._match_text_seq("SET", "NOT", "NULL"): 7049 return self.expression( 7050 exp.AlterColumn, 7051 this=column, 7052 allow_null=False, 7053 ) 7054 self._match_text_seq("SET", "DATA") 7055 self._match_text_seq("TYPE") 7056 return self.expression( 7057 exp.AlterColumn, 7058 this=column, 7059 dtype=self._parse_types(), 7060 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7061 using=self._match(TokenType.USING) and self._parse_assignment(), 7062 ) 7063 7064 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7065 if self._match_texts(("ALL", "EVEN", "AUTO")): 7066 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7067 7068 self._match_text_seq("KEY", "DISTKEY") 7069 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7070 7071 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7072 if compound: 7073 self._match_text_seq("SORTKEY") 7074 7075 if self._match(TokenType.L_PAREN, advance=False): 7076 return self.expression( 7077 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7078 ) 7079 7080 self._match_texts(("AUTO", "NONE")) 7081 return self.expression( 7082 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7083 ) 7084 7085 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7086 index = self._index - 1 7087 7088 partition_exists = self._parse_exists() 7089 if self._match(TokenType.PARTITION, advance=False): 7090 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7091 7092 self._retreat(index) 7093 return self._parse_csv(self._parse_drop_column) 7094 7095 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7096 if self._match(TokenType.COLUMN): 7097 exists = self._parse_exists() 7098 old_column = self._parse_column() 7099 to = self._match_text_seq("TO") 7100 new_column = self._parse_column() 7101 7102 if old_column is None or to is None or new_column is None: 7103 return None 7104 7105 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7106 7107 self._match_text_seq("TO") 7108 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7109 7110 def _parse_alter_table_set(self) -> exp.AlterSet: 7111 alter_set = self.expression(exp.AlterSet) 7112 7113 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7114 "TABLE", "PROPERTIES" 7115 ): 7116 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7117 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7118 alter_set.set("expressions", [self._parse_assignment()]) 7119 elif self._match_texts(("LOGGED", "UNLOGGED")): 7120 alter_set.set("option", exp.var(self._prev.text.upper())) 7121 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7122 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7123 elif self._match_text_seq("LOCATION"): 7124 alter_set.set("location", self._parse_field()) 7125 elif self._match_text_seq("ACCESS", "METHOD"): 7126 alter_set.set("access_method", self._parse_field()) 7127 elif self._match_text_seq("TABLESPACE"): 7128 alter_set.set("tablespace", self._parse_field()) 7129 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7130 alter_set.set("file_format", [self._parse_field()]) 7131 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7132 alter_set.set("file_format", self._parse_wrapped_options()) 7133 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7134 alter_set.set("copy_options", self._parse_wrapped_options()) 7135 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7136 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7137 else: 7138 if self._match_text_seq("SERDE"): 7139 alter_set.set("serde", self._parse_field()) 7140 7141 alter_set.set("expressions", [self._parse_properties()]) 7142 7143 return alter_set 7144 7145 def _parse_alter(self) -> exp.Alter | exp.Command: 7146 start = self._prev 7147 7148 alter_token = self._match_set(self.ALTERABLES) and self._prev 7149 if not alter_token: 7150 return self._parse_as_command(start) 7151 7152 exists = self._parse_exists() 7153 only = self._match_text_seq("ONLY") 7154 this = self._parse_table(schema=True) 7155 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7156 7157 if self._next: 7158 self._advance() 7159 7160 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7161 if parser: 7162 actions = ensure_list(parser(self)) 7163 not_valid = self._match_text_seq("NOT", "VALID") 7164 options = self._parse_csv(self._parse_property) 7165 7166 if not self._curr and actions: 7167 return self.expression( 7168 exp.Alter, 7169 this=this, 7170 kind=alter_token.text.upper(), 7171 exists=exists, 7172 actions=actions, 7173 only=only, 7174 options=options, 7175 cluster=cluster, 7176 not_valid=not_valid, 7177 ) 7178 7179 return self._parse_as_command(start) 7180 7181 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7182 start = self._prev 7183 # https://duckdb.org/docs/sql/statements/analyze 7184 if not self._curr: 7185 return self.expression(exp.Analyze) 7186 7187 options = [] 7188 while self._match_texts(self.ANALYZE_STYLES): 7189 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7190 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7191 else: 7192 options.append(self._prev.text.upper()) 7193 7194 this: t.Optional[exp.Expression] = None 7195 inner_expression: t.Optional[exp.Expression] = None 7196 7197 kind = self._curr and self._curr.text.upper() 7198 7199 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7200 this = self._parse_table_parts() 7201 elif self._match_text_seq("TABLES"): 7202 if self._match_set((TokenType.FROM, TokenType.IN)): 7203 kind = f"{kind} {self._prev.text.upper()}" 7204 this = self._parse_table(schema=True, is_db_reference=True) 7205 elif self._match_text_seq("DATABASE"): 7206 this = self._parse_table(schema=True, is_db_reference=True) 7207 elif self._match_text_seq("CLUSTER"): 7208 this = self._parse_table() 7209 # Try matching inner expr keywords before fallback to parse table. 7210 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7211 kind = None 7212 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7213 else: 7214 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7215 kind = None 7216 this = self._parse_table_parts() 7217 7218 partition = self._try_parse(self._parse_partition) 7219 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7220 return self._parse_as_command(start) 7221 7222 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7223 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7224 "WITH", "ASYNC", "MODE" 7225 ): 7226 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7227 else: 7228 mode = None 7229 7230 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7231 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7232 7233 properties = self._parse_properties() 7234 return self.expression( 7235 exp.Analyze, 7236 kind=kind, 7237 this=this, 7238 mode=mode, 7239 partition=partition, 7240 properties=properties, 7241 expression=inner_expression, 7242 options=options, 7243 ) 7244 7245 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7246 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7247 this = None 7248 kind = self._prev.text.upper() 7249 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7250 expressions = [] 7251 7252 if not self._match_text_seq("STATISTICS"): 7253 self.raise_error("Expecting token STATISTICS") 7254 7255 if self._match_text_seq("NOSCAN"): 7256 this = "NOSCAN" 7257 elif self._match(TokenType.FOR): 7258 if self._match_text_seq("ALL", "COLUMNS"): 7259 this = "FOR ALL COLUMNS" 7260 if self._match_texts("COLUMNS"): 7261 this = "FOR COLUMNS" 7262 expressions = self._parse_csv(self._parse_column_reference) 7263 elif self._match_text_seq("SAMPLE"): 7264 sample = self._parse_number() 7265 expressions = [ 7266 self.expression( 7267 exp.AnalyzeSample, 7268 sample=sample, 7269 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7270 ) 7271 ] 7272 7273 return self.expression( 7274 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7275 ) 7276 7277 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7278 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7279 kind = None 7280 this = None 7281 expression: t.Optional[exp.Expression] = None 7282 if self._match_text_seq("REF", "UPDATE"): 7283 kind = "REF" 7284 this = "UPDATE" 7285 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7286 this = "UPDATE SET DANGLING TO NULL" 7287 elif self._match_text_seq("STRUCTURE"): 7288 kind = "STRUCTURE" 7289 if self._match_text_seq("CASCADE", "FAST"): 7290 this = "CASCADE FAST" 7291 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7292 ("ONLINE", "OFFLINE") 7293 ): 7294 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7295 expression = self._parse_into() 7296 7297 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7298 7299 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7300 this = self._prev.text.upper() 7301 if self._match_text_seq("COLUMNS"): 7302 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7303 return None 7304 7305 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7306 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7307 if self._match_text_seq("STATISTICS"): 7308 return self.expression(exp.AnalyzeDelete, kind=kind) 7309 return None 7310 7311 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7312 if self._match_text_seq("CHAINED", "ROWS"): 7313 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7314 return None 7315 7316 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7317 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7318 this = self._prev.text.upper() 7319 expression: t.Optional[exp.Expression] = None 7320 expressions = [] 7321 update_options = None 7322 7323 if self._match_text_seq("HISTOGRAM", "ON"): 7324 expressions = self._parse_csv(self._parse_column_reference) 7325 with_expressions = [] 7326 while self._match(TokenType.WITH): 7327 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7328 if self._match_texts(("SYNC", "ASYNC")): 7329 if self._match_text_seq("MODE", advance=False): 7330 with_expressions.append(f"{self._prev.text.upper()} MODE") 7331 self._advance() 7332 else: 7333 buckets = self._parse_number() 7334 if self._match_text_seq("BUCKETS"): 7335 with_expressions.append(f"{buckets} BUCKETS") 7336 if with_expressions: 7337 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7338 7339 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7340 TokenType.UPDATE, advance=False 7341 ): 7342 update_options = self._prev.text.upper() 7343 self._advance() 7344 elif self._match_text_seq("USING", "DATA"): 7345 expression = self.expression(exp.UsingData, this=self._parse_string()) 7346 7347 return self.expression( 7348 exp.AnalyzeHistogram, 7349 this=this, 7350 expressions=expressions, 7351 expression=expression, 7352 update_options=update_options, 7353 ) 7354 7355 def _parse_merge(self) -> exp.Merge: 7356 self._match(TokenType.INTO) 7357 target = self._parse_table() 7358 7359 if target and self._match(TokenType.ALIAS, advance=False): 7360 target.set("alias", self._parse_table_alias()) 7361 7362 self._match(TokenType.USING) 7363 using = self._parse_table() 7364 7365 self._match(TokenType.ON) 7366 on = self._parse_assignment() 7367 7368 return self.expression( 7369 exp.Merge, 7370 this=target, 7371 using=using, 7372 on=on, 7373 whens=self._parse_when_matched(), 7374 returning=self._parse_returning(), 7375 ) 7376 7377 def _parse_when_matched(self) -> exp.Whens: 7378 whens = [] 7379 7380 while self._match(TokenType.WHEN): 7381 matched = not self._match(TokenType.NOT) 7382 self._match_text_seq("MATCHED") 7383 source = ( 7384 False 7385 if self._match_text_seq("BY", "TARGET") 7386 else self._match_text_seq("BY", "SOURCE") 7387 ) 7388 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7389 7390 self._match(TokenType.THEN) 7391 7392 if self._match(TokenType.INSERT): 7393 this = self._parse_star() 7394 if this: 7395 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7396 else: 7397 then = self.expression( 7398 exp.Insert, 7399 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7400 expression=self._match_text_seq("VALUES") and self._parse_value(), 7401 ) 7402 elif self._match(TokenType.UPDATE): 7403 expressions = self._parse_star() 7404 if expressions: 7405 then = self.expression(exp.Update, expressions=expressions) 7406 else: 7407 then = self.expression( 7408 exp.Update, 7409 expressions=self._match(TokenType.SET) 7410 and self._parse_csv(self._parse_equality), 7411 ) 7412 elif self._match(TokenType.DELETE): 7413 then = self.expression(exp.Var, this=self._prev.text) 7414 else: 7415 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7416 7417 whens.append( 7418 self.expression( 7419 exp.When, 7420 matched=matched, 7421 source=source, 7422 condition=condition, 7423 then=then, 7424 ) 7425 ) 7426 return self.expression(exp.Whens, expressions=whens) 7427 7428 def _parse_show(self) -> t.Optional[exp.Expression]: 7429 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7430 if parser: 7431 return parser(self) 7432 return self._parse_as_command(self._prev) 7433 7434 def _parse_set_item_assignment( 7435 self, kind: t.Optional[str] = None 7436 ) -> t.Optional[exp.Expression]: 7437 index = self._index 7438 7439 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7440 return self._parse_set_transaction(global_=kind == "GLOBAL") 7441 7442 left = self._parse_primary() or self._parse_column() 7443 assignment_delimiter = self._match_texts(("=", "TO")) 7444 7445 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7446 self._retreat(index) 7447 return None 7448 7449 right = self._parse_statement() or self._parse_id_var() 7450 if isinstance(right, (exp.Column, exp.Identifier)): 7451 right = exp.var(right.name) 7452 7453 this = self.expression(exp.EQ, this=left, expression=right) 7454 return self.expression(exp.SetItem, this=this, kind=kind) 7455 7456 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7457 self._match_text_seq("TRANSACTION") 7458 characteristics = self._parse_csv( 7459 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7460 ) 7461 return self.expression( 7462 exp.SetItem, 7463 expressions=characteristics, 7464 kind="TRANSACTION", 7465 **{"global": global_}, # type: ignore 7466 ) 7467 7468 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7469 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7470 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7471 7472 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7473 index = self._index 7474 set_ = self.expression( 7475 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7476 ) 7477 7478 if self._curr: 7479 self._retreat(index) 7480 return self._parse_as_command(self._prev) 7481 7482 return set_ 7483 7484 def _parse_var_from_options( 7485 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7486 ) -> t.Optional[exp.Var]: 7487 start = self._curr 7488 if not start: 7489 return None 7490 7491 option = start.text.upper() 7492 continuations = options.get(option) 7493 7494 index = self._index 7495 self._advance() 7496 for keywords in continuations or []: 7497 if isinstance(keywords, str): 7498 keywords = (keywords,) 7499 7500 if self._match_text_seq(*keywords): 7501 option = f"{option} {' '.join(keywords)}" 7502 break 7503 else: 7504 if continuations or continuations is None: 7505 if raise_unmatched: 7506 self.raise_error(f"Unknown option {option}") 7507 7508 self._retreat(index) 7509 return None 7510 7511 return exp.var(option) 7512 7513 def _parse_as_command(self, start: Token) -> exp.Command: 7514 while self._curr: 7515 self._advance() 7516 text = self._find_sql(start, self._prev) 7517 size = len(start.text) 7518 self._warn_unsupported() 7519 return exp.Command(this=text[:size], expression=text[size:]) 7520 7521 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7522 settings = [] 7523 7524 self._match_l_paren() 7525 kind = self._parse_id_var() 7526 7527 if self._match(TokenType.L_PAREN): 7528 while True: 7529 key = self._parse_id_var() 7530 value = self._parse_primary() 7531 if not key and value is None: 7532 break 7533 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7534 self._match(TokenType.R_PAREN) 7535 7536 self._match_r_paren() 7537 7538 return self.expression( 7539 exp.DictProperty, 7540 this=this, 7541 kind=kind.this if kind else None, 7542 settings=settings, 7543 ) 7544 7545 def _parse_dict_range(self, this: str) -> exp.DictRange: 7546 self._match_l_paren() 7547 has_min = self._match_text_seq("MIN") 7548 if has_min: 7549 min = self._parse_var() or self._parse_primary() 7550 self._match_text_seq("MAX") 7551 max = self._parse_var() or self._parse_primary() 7552 else: 7553 max = self._parse_var() or self._parse_primary() 7554 min = exp.Literal.number(0) 7555 self._match_r_paren() 7556 return self.expression(exp.DictRange, this=this, min=min, max=max) 7557 7558 def _parse_comprehension( 7559 self, this: t.Optional[exp.Expression] 7560 ) -> t.Optional[exp.Comprehension]: 7561 index = self._index 7562 expression = self._parse_column() 7563 if not self._match(TokenType.IN): 7564 self._retreat(index - 1) 7565 return None 7566 iterator = self._parse_column() 7567 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7568 return self.expression( 7569 exp.Comprehension, 7570 this=this, 7571 expression=expression, 7572 iterator=iterator, 7573 condition=condition, 7574 ) 7575 7576 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7577 if self._match(TokenType.HEREDOC_STRING): 7578 return self.expression(exp.Heredoc, this=self._prev.text) 7579 7580 if not self._match_text_seq("$"): 7581 return None 7582 7583 tags = ["$"] 7584 tag_text = None 7585 7586 if self._is_connected(): 7587 self._advance() 7588 tags.append(self._prev.text.upper()) 7589 else: 7590 self.raise_error("No closing $ found") 7591 7592 if tags[-1] != "$": 7593 if self._is_connected() and self._match_text_seq("$"): 7594 tag_text = tags[-1] 7595 tags.append("$") 7596 else: 7597 self.raise_error("No closing $ found") 7598 7599 heredoc_start = self._curr 7600 7601 while self._curr: 7602 if self._match_text_seq(*tags, advance=False): 7603 this = self._find_sql(heredoc_start, self._prev) 7604 self._advance(len(tags)) 7605 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7606 7607 self._advance() 7608 7609 self.raise_error(f"No closing {''.join(tags)} found") 7610 return None 7611 7612 def _find_parser( 7613 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7614 ) -> t.Optional[t.Callable]: 7615 if not self._curr: 7616 return None 7617 7618 index = self._index 7619 this = [] 7620 while True: 7621 # The current token might be multiple words 7622 curr = self._curr.text.upper() 7623 key = curr.split(" ") 7624 this.append(curr) 7625 7626 self._advance() 7627 result, trie = in_trie(trie, key) 7628 if result == TrieResult.FAILED: 7629 break 7630 7631 if result == TrieResult.EXISTS: 7632 subparser = parsers[" ".join(this)] 7633 return subparser 7634 7635 self._retreat(index) 7636 return None 7637 7638 def _match(self, token_type, advance=True, expression=None): 7639 if not self._curr: 7640 return None 7641 7642 if self._curr.token_type == token_type: 7643 if advance: 7644 self._advance() 7645 self._add_comments(expression) 7646 return True 7647 7648 return None 7649 7650 def _match_set(self, types, advance=True): 7651 if not self._curr: 7652 return None 7653 7654 if self._curr.token_type in types: 7655 if advance: 7656 self._advance() 7657 return True 7658 7659 return None 7660 7661 def _match_pair(self, token_type_a, token_type_b, advance=True): 7662 if not self._curr or not self._next: 7663 return None 7664 7665 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7666 if advance: 7667 self._advance(2) 7668 return True 7669 7670 return None 7671 7672 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7673 if not self._match(TokenType.L_PAREN, expression=expression): 7674 self.raise_error("Expecting (") 7675 7676 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7677 if not self._match(TokenType.R_PAREN, expression=expression): 7678 self.raise_error("Expecting )") 7679 7680 def _match_texts(self, texts, advance=True): 7681 if ( 7682 self._curr 7683 and self._curr.token_type != TokenType.STRING 7684 and self._curr.text.upper() in texts 7685 ): 7686 if advance: 7687 self._advance() 7688 return True 7689 return None 7690 7691 def _match_text_seq(self, *texts, advance=True): 7692 index = self._index 7693 for text in texts: 7694 if ( 7695 self._curr 7696 and self._curr.token_type != TokenType.STRING 7697 and self._curr.text.upper() == text 7698 ): 7699 self._advance() 7700 else: 7701 self._retreat(index) 7702 return None 7703 7704 if not advance: 7705 self._retreat(index) 7706 7707 return True 7708 7709 def _replace_lambda( 7710 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7711 ) -> t.Optional[exp.Expression]: 7712 if not node: 7713 return node 7714 7715 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7716 7717 for column in node.find_all(exp.Column): 7718 typ = lambda_types.get(column.parts[0].name) 7719 if typ is not None: 7720 dot_or_id = column.to_dot() if column.table else column.this 7721 7722 if typ: 7723 dot_or_id = self.expression( 7724 exp.Cast, 7725 this=dot_or_id, 7726 to=typ, 7727 ) 7728 7729 parent = column.parent 7730 7731 while isinstance(parent, exp.Dot): 7732 if not isinstance(parent.parent, exp.Dot): 7733 parent.replace(dot_or_id) 7734 break 7735 parent = parent.parent 7736 else: 7737 if column is node: 7738 node = dot_or_id 7739 else: 7740 column.replace(dot_or_id) 7741 return node 7742 7743 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7744 start = self._prev 7745 7746 # Not to be confused with TRUNCATE(number, decimals) function call 7747 if self._match(TokenType.L_PAREN): 7748 self._retreat(self._index - 2) 7749 return self._parse_function() 7750 7751 # Clickhouse supports TRUNCATE DATABASE as well 7752 is_database = self._match(TokenType.DATABASE) 7753 7754 self._match(TokenType.TABLE) 7755 7756 exists = self._parse_exists(not_=False) 7757 7758 expressions = self._parse_csv( 7759 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7760 ) 7761 7762 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7763 7764 if self._match_text_seq("RESTART", "IDENTITY"): 7765 identity = "RESTART" 7766 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7767 identity = "CONTINUE" 7768 else: 7769 identity = None 7770 7771 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7772 option = self._prev.text 7773 else: 7774 option = None 7775 7776 partition = self._parse_partition() 7777 7778 # Fallback case 7779 if self._curr: 7780 return self._parse_as_command(start) 7781 7782 return self.expression( 7783 exp.TruncateTable, 7784 expressions=expressions, 7785 is_database=is_database, 7786 exists=exists, 7787 cluster=cluster, 7788 identity=identity, 7789 option=option, 7790 partition=partition, 7791 ) 7792 7793 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7794 this = self._parse_ordered(self._parse_opclass) 7795 7796 if not self._match(TokenType.WITH): 7797 return this 7798 7799 op = self._parse_var(any_token=True) 7800 7801 return self.expression(exp.WithOperator, this=this, op=op) 7802 7803 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7804 self._match(TokenType.EQ) 7805 self._match(TokenType.L_PAREN) 7806 7807 opts: t.List[t.Optional[exp.Expression]] = [] 7808 while self._curr and not self._match(TokenType.R_PAREN): 7809 if self._match_text_seq("FORMAT_NAME", "="): 7810 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7811 # so we parse it separately to use _parse_field() 7812 prop = self.expression( 7813 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7814 ) 7815 opts.append(prop) 7816 else: 7817 opts.append(self._parse_property()) 7818 7819 self._match(TokenType.COMMA) 7820 7821 return opts 7822 7823 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7824 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7825 7826 options = [] 7827 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7828 option = self._parse_var(any_token=True) 7829 prev = self._prev.text.upper() 7830 7831 # Different dialects might separate options and values by white space, "=" and "AS" 7832 self._match(TokenType.EQ) 7833 self._match(TokenType.ALIAS) 7834 7835 param = self.expression(exp.CopyParameter, this=option) 7836 7837 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7838 TokenType.L_PAREN, advance=False 7839 ): 7840 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7841 param.set("expressions", self._parse_wrapped_options()) 7842 elif prev == "FILE_FORMAT": 7843 # T-SQL's external file format case 7844 param.set("expression", self._parse_field()) 7845 else: 7846 param.set("expression", self._parse_unquoted_field()) 7847 7848 options.append(param) 7849 self._match(sep) 7850 7851 return options 7852 7853 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7854 expr = self.expression(exp.Credentials) 7855 7856 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7857 expr.set("storage", self._parse_field()) 7858 if self._match_text_seq("CREDENTIALS"): 7859 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7860 creds = ( 7861 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7862 ) 7863 expr.set("credentials", creds) 7864 if self._match_text_seq("ENCRYPTION"): 7865 expr.set("encryption", self._parse_wrapped_options()) 7866 if self._match_text_seq("IAM_ROLE"): 7867 expr.set("iam_role", self._parse_field()) 7868 if self._match_text_seq("REGION"): 7869 expr.set("region", self._parse_field()) 7870 7871 return expr 7872 7873 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7874 return self._parse_field() 7875 7876 def _parse_copy(self) -> exp.Copy | exp.Command: 7877 start = self._prev 7878 7879 self._match(TokenType.INTO) 7880 7881 this = ( 7882 self._parse_select(nested=True, parse_subquery_alias=False) 7883 if self._match(TokenType.L_PAREN, advance=False) 7884 else self._parse_table(schema=True) 7885 ) 7886 7887 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7888 7889 files = self._parse_csv(self._parse_file_location) 7890 credentials = self._parse_credentials() 7891 7892 self._match_text_seq("WITH") 7893 7894 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7895 7896 # Fallback case 7897 if self._curr: 7898 return self._parse_as_command(start) 7899 7900 return self.expression( 7901 exp.Copy, 7902 this=this, 7903 kind=kind, 7904 credentials=credentials, 7905 files=files, 7906 params=params, 7907 ) 7908 7909 def _parse_normalize(self) -> exp.Normalize: 7910 return self.expression( 7911 exp.Normalize, 7912 this=self._parse_bitwise(), 7913 form=self._match(TokenType.COMMA) and self._parse_var(), 7914 ) 7915 7916 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7917 args = self._parse_csv(lambda: self._parse_lambda()) 7918 7919 this = seq_get(args, 0) 7920 decimals = seq_get(args, 1) 7921 7922 return expr_type( 7923 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7924 ) 7925 7926 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7927 if self._match_text_seq("COLUMNS", "(", advance=False): 7928 this = self._parse_function() 7929 if isinstance(this, exp.Columns): 7930 this.set("unpack", True) 7931 return this 7932 7933 return self.expression( 7934 exp.Star, 7935 **{ # type: ignore 7936 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7937 "replace": self._parse_star_op("REPLACE"), 7938 "rename": self._parse_star_op("RENAME"), 7939 }, 7940 ) 7941 7942 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7943 privilege_parts = [] 7944 7945 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7946 # (end of privilege list) or L_PAREN (start of column list) are met 7947 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7948 privilege_parts.append(self._curr.text.upper()) 7949 self._advance() 7950 7951 this = exp.var(" ".join(privilege_parts)) 7952 expressions = ( 7953 self._parse_wrapped_csv(self._parse_column) 7954 if self._match(TokenType.L_PAREN, advance=False) 7955 else None 7956 ) 7957 7958 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7959 7960 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7961 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7962 principal = self._parse_id_var() 7963 7964 if not principal: 7965 return None 7966 7967 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7968 7969 def _parse_grant(self) -> exp.Grant | exp.Command: 7970 start = self._prev 7971 7972 privileges = self._parse_csv(self._parse_grant_privilege) 7973 7974 self._match(TokenType.ON) 7975 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7976 7977 # Attempt to parse the securable e.g. MySQL allows names 7978 # such as "foo.*", "*.*" which are not easily parseable yet 7979 securable = self._try_parse(self._parse_table_parts) 7980 7981 if not securable or not self._match_text_seq("TO"): 7982 return self._parse_as_command(start) 7983 7984 principals = self._parse_csv(self._parse_grant_principal) 7985 7986 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7987 7988 if self._curr: 7989 return self._parse_as_command(start) 7990 7991 return self.expression( 7992 exp.Grant, 7993 privileges=privileges, 7994 kind=kind, 7995 securable=securable, 7996 principals=principals, 7997 grant_option=grant_option, 7998 ) 7999 8000 def _parse_overlay(self) -> exp.Overlay: 8001 return self.expression( 8002 exp.Overlay, 8003 **{ # type: ignore 8004 "this": self._parse_bitwise(), 8005 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8006 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8007 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8008 }, 8009 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.MEDIUMBLOB, 331 TokenType.LONGBLOB, 332 TokenType.BINARY, 333 TokenType.VARBINARY, 334 TokenType.JSON, 335 TokenType.JSONB, 336 TokenType.INTERVAL, 337 TokenType.TINYBLOB, 338 TokenType.TINYTEXT, 339 TokenType.TIME, 340 TokenType.TIMETZ, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMP_S, 343 TokenType.TIMESTAMP_MS, 344 TokenType.TIMESTAMP_NS, 345 TokenType.TIMESTAMPTZ, 346 TokenType.TIMESTAMPLTZ, 347 TokenType.TIMESTAMPNTZ, 348 TokenType.DATETIME, 349 TokenType.DATETIME2, 350 TokenType.DATETIME64, 351 TokenType.SMALLDATETIME, 352 TokenType.DATE, 353 TokenType.DATE32, 354 TokenType.INT4RANGE, 355 TokenType.INT4MULTIRANGE, 356 TokenType.INT8RANGE, 357 TokenType.INT8MULTIRANGE, 358 TokenType.NUMRANGE, 359 TokenType.NUMMULTIRANGE, 360 TokenType.TSRANGE, 361 TokenType.TSMULTIRANGE, 362 TokenType.TSTZRANGE, 363 TokenType.TSTZMULTIRANGE, 364 TokenType.DATERANGE, 365 TokenType.DATEMULTIRANGE, 366 TokenType.DECIMAL, 367 TokenType.DECIMAL32, 368 TokenType.DECIMAL64, 369 TokenType.DECIMAL128, 370 TokenType.DECIMAL256, 371 TokenType.UDECIMAL, 372 TokenType.BIGDECIMAL, 373 TokenType.UUID, 374 TokenType.GEOGRAPHY, 375 TokenType.GEOMETRY, 376 TokenType.POINT, 377 TokenType.RING, 378 TokenType.LINESTRING, 379 TokenType.MULTILINESTRING, 380 TokenType.POLYGON, 381 TokenType.MULTIPOLYGON, 382 TokenType.HLLSKETCH, 383 TokenType.HSTORE, 384 TokenType.PSEUDO_TYPE, 385 TokenType.SUPER, 386 TokenType.SERIAL, 387 TokenType.SMALLSERIAL, 388 TokenType.BIGSERIAL, 389 TokenType.XML, 390 TokenType.YEAR, 391 TokenType.UNIQUEIDENTIFIER, 392 TokenType.USERDEFINED, 393 TokenType.MONEY, 394 TokenType.SMALLMONEY, 395 TokenType.ROWVERSION, 396 TokenType.IMAGE, 397 TokenType.VARIANT, 398 TokenType.VECTOR, 399 TokenType.OBJECT, 400 TokenType.OBJECT_IDENTIFIER, 401 TokenType.INET, 402 TokenType.IPADDRESS, 403 TokenType.IPPREFIX, 404 TokenType.IPV4, 405 TokenType.IPV6, 406 TokenType.UNKNOWN, 407 TokenType.NULL, 408 TokenType.NAME, 409 TokenType.TDIGEST, 410 TokenType.DYNAMIC, 411 *ENUM_TYPE_TOKENS, 412 *NESTED_TYPE_TOKENS, 413 *AGGREGATE_TYPE_TOKENS, 414 } 415 416 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 417 TokenType.BIGINT: TokenType.UBIGINT, 418 TokenType.INT: TokenType.UINT, 419 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 420 TokenType.SMALLINT: TokenType.USMALLINT, 421 TokenType.TINYINT: TokenType.UTINYINT, 422 TokenType.DECIMAL: TokenType.UDECIMAL, 423 } 424 425 SUBQUERY_PREDICATES = { 426 TokenType.ANY: exp.Any, 427 TokenType.ALL: exp.All, 428 TokenType.EXISTS: exp.Exists, 429 TokenType.SOME: exp.Any, 430 } 431 432 RESERVED_TOKENS = { 433 *Tokenizer.SINGLE_TOKENS.values(), 434 TokenType.SELECT, 435 } - {TokenType.IDENTIFIER} 436 437 DB_CREATABLES = { 438 TokenType.DATABASE, 439 TokenType.DICTIONARY, 440 TokenType.MODEL, 441 TokenType.NAMESPACE, 442 TokenType.SCHEMA, 443 TokenType.SEQUENCE, 444 TokenType.SINK, 445 TokenType.SOURCE, 446 TokenType.STORAGE_INTEGRATION, 447 TokenType.STREAMLIT, 448 TokenType.TABLE, 449 TokenType.TAG, 450 TokenType.VIEW, 451 TokenType.WAREHOUSE, 452 } 453 454 CREATABLES = { 455 TokenType.COLUMN, 456 TokenType.CONSTRAINT, 457 TokenType.FOREIGN_KEY, 458 TokenType.FUNCTION, 459 TokenType.INDEX, 460 TokenType.PROCEDURE, 461 *DB_CREATABLES, 462 } 463 464 ALTERABLES = { 465 TokenType.INDEX, 466 TokenType.TABLE, 467 TokenType.VIEW, 468 } 469 470 # Tokens that can represent identifiers 471 ID_VAR_TOKENS = { 472 TokenType.ALL, 473 TokenType.ATTACH, 474 TokenType.VAR, 475 TokenType.ANTI, 476 TokenType.APPLY, 477 TokenType.ASC, 478 TokenType.ASOF, 479 TokenType.AUTO_INCREMENT, 480 TokenType.BEGIN, 481 TokenType.BPCHAR, 482 TokenType.CACHE, 483 TokenType.CASE, 484 TokenType.COLLATE, 485 TokenType.COMMAND, 486 TokenType.COMMENT, 487 TokenType.COMMIT, 488 TokenType.CONSTRAINT, 489 TokenType.COPY, 490 TokenType.CUBE, 491 TokenType.DEFAULT, 492 TokenType.DELETE, 493 TokenType.DESC, 494 TokenType.DESCRIBE, 495 TokenType.DETACH, 496 TokenType.DICTIONARY, 497 TokenType.DIV, 498 TokenType.END, 499 TokenType.EXECUTE, 500 TokenType.ESCAPE, 501 TokenType.FALSE, 502 TokenType.FIRST, 503 TokenType.FILTER, 504 TokenType.FINAL, 505 TokenType.FORMAT, 506 TokenType.FULL, 507 TokenType.IDENTIFIER, 508 TokenType.IS, 509 TokenType.ISNULL, 510 TokenType.INTERVAL, 511 TokenType.KEEP, 512 TokenType.KILL, 513 TokenType.LEFT, 514 TokenType.LIMIT, 515 TokenType.LOAD, 516 TokenType.MERGE, 517 TokenType.NATURAL, 518 TokenType.NEXT, 519 TokenType.OFFSET, 520 TokenType.OPERATOR, 521 TokenType.ORDINALITY, 522 TokenType.OVERLAPS, 523 TokenType.OVERWRITE, 524 TokenType.PARTITION, 525 TokenType.PERCENT, 526 TokenType.PIVOT, 527 TokenType.PRAGMA, 528 TokenType.RANGE, 529 TokenType.RECURSIVE, 530 TokenType.REFERENCES, 531 TokenType.REFRESH, 532 TokenType.RENAME, 533 TokenType.REPLACE, 534 TokenType.RIGHT, 535 TokenType.ROLLUP, 536 TokenType.ROW, 537 TokenType.ROWS, 538 TokenType.SEMI, 539 TokenType.SET, 540 TokenType.SETTINGS, 541 TokenType.SHOW, 542 TokenType.TEMPORARY, 543 TokenType.TOP, 544 TokenType.TRUE, 545 TokenType.TRUNCATE, 546 TokenType.UNIQUE, 547 TokenType.UNNEST, 548 TokenType.UNPIVOT, 549 TokenType.UPDATE, 550 TokenType.USE, 551 TokenType.VOLATILE, 552 TokenType.WINDOW, 553 *CREATABLES, 554 *SUBQUERY_PREDICATES, 555 *TYPE_TOKENS, 556 *NO_PAREN_FUNCTIONS, 557 } 558 ID_VAR_TOKENS.remove(TokenType.UNION) 559 560 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 561 562 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 563 TokenType.ANTI, 564 TokenType.APPLY, 565 TokenType.ASOF, 566 TokenType.FULL, 567 TokenType.LEFT, 568 TokenType.LOCK, 569 TokenType.NATURAL, 570 TokenType.RIGHT, 571 TokenType.SEMI, 572 TokenType.WINDOW, 573 } 574 575 ALIAS_TOKENS = ID_VAR_TOKENS 576 577 ARRAY_CONSTRUCTORS = { 578 "ARRAY": exp.Array, 579 "LIST": exp.List, 580 } 581 582 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 583 584 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 585 586 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 587 588 FUNC_TOKENS = { 589 TokenType.COLLATE, 590 TokenType.COMMAND, 591 TokenType.CURRENT_DATE, 592 TokenType.CURRENT_DATETIME, 593 TokenType.CURRENT_TIMESTAMP, 594 TokenType.CURRENT_TIME, 595 TokenType.CURRENT_USER, 596 TokenType.FILTER, 597 TokenType.FIRST, 598 TokenType.FORMAT, 599 TokenType.GLOB, 600 TokenType.IDENTIFIER, 601 TokenType.INDEX, 602 TokenType.ISNULL, 603 TokenType.ILIKE, 604 TokenType.INSERT, 605 TokenType.LIKE, 606 TokenType.MERGE, 607 TokenType.NEXT, 608 TokenType.OFFSET, 609 TokenType.PRIMARY_KEY, 610 TokenType.RANGE, 611 TokenType.REPLACE, 612 TokenType.RLIKE, 613 TokenType.ROW, 614 TokenType.UNNEST, 615 TokenType.VAR, 616 TokenType.LEFT, 617 TokenType.RIGHT, 618 TokenType.SEQUENCE, 619 TokenType.DATE, 620 TokenType.DATETIME, 621 TokenType.TABLE, 622 TokenType.TIMESTAMP, 623 TokenType.TIMESTAMPTZ, 624 TokenType.TRUNCATE, 625 TokenType.WINDOW, 626 TokenType.XOR, 627 *TYPE_TOKENS, 628 *SUBQUERY_PREDICATES, 629 } 630 631 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 632 TokenType.AND: exp.And, 633 } 634 635 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 636 TokenType.COLON_EQ: exp.PropertyEQ, 637 } 638 639 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 640 TokenType.OR: exp.Or, 641 } 642 643 EQUALITY = { 644 TokenType.EQ: exp.EQ, 645 TokenType.NEQ: exp.NEQ, 646 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 647 } 648 649 COMPARISON = { 650 TokenType.GT: exp.GT, 651 TokenType.GTE: exp.GTE, 652 TokenType.LT: exp.LT, 653 TokenType.LTE: exp.LTE, 654 } 655 656 BITWISE = { 657 TokenType.AMP: exp.BitwiseAnd, 658 TokenType.CARET: exp.BitwiseXor, 659 TokenType.PIPE: exp.BitwiseOr, 660 } 661 662 TERM = { 663 TokenType.DASH: exp.Sub, 664 TokenType.PLUS: exp.Add, 665 TokenType.MOD: exp.Mod, 666 TokenType.COLLATE: exp.Collate, 667 } 668 669 FACTOR = { 670 TokenType.DIV: exp.IntDiv, 671 TokenType.LR_ARROW: exp.Distance, 672 TokenType.SLASH: exp.Div, 673 TokenType.STAR: exp.Mul, 674 } 675 676 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 677 678 TIMES = { 679 TokenType.TIME, 680 TokenType.TIMETZ, 681 } 682 683 TIMESTAMPS = { 684 TokenType.TIMESTAMP, 685 TokenType.TIMESTAMPTZ, 686 TokenType.TIMESTAMPLTZ, 687 *TIMES, 688 } 689 690 SET_OPERATIONS = { 691 TokenType.UNION, 692 TokenType.INTERSECT, 693 TokenType.EXCEPT, 694 } 695 696 JOIN_METHODS = { 697 TokenType.ASOF, 698 TokenType.NATURAL, 699 TokenType.POSITIONAL, 700 } 701 702 JOIN_SIDES = { 703 TokenType.LEFT, 704 TokenType.RIGHT, 705 TokenType.FULL, 706 } 707 708 JOIN_KINDS = { 709 TokenType.ANTI, 710 TokenType.CROSS, 711 TokenType.INNER, 712 TokenType.OUTER, 713 TokenType.SEMI, 714 TokenType.STRAIGHT_JOIN, 715 } 716 717 JOIN_HINTS: t.Set[str] = set() 718 719 LAMBDAS = { 720 TokenType.ARROW: lambda self, expressions: self.expression( 721 exp.Lambda, 722 this=self._replace_lambda( 723 self._parse_assignment(), 724 expressions, 725 ), 726 expressions=expressions, 727 ), 728 TokenType.FARROW: lambda self, expressions: self.expression( 729 exp.Kwarg, 730 this=exp.var(expressions[0].name), 731 expression=self._parse_assignment(), 732 ), 733 } 734 735 COLUMN_OPERATORS = { 736 TokenType.DOT: None, 737 TokenType.DCOLON: lambda self, this, to: self.expression( 738 exp.Cast if self.STRICT_CAST else exp.TryCast, 739 this=this, 740 to=to, 741 ), 742 TokenType.ARROW: lambda self, this, path: self.expression( 743 exp.JSONExtract, 744 this=this, 745 expression=self.dialect.to_json_path(path), 746 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 747 ), 748 TokenType.DARROW: lambda self, this, path: self.expression( 749 exp.JSONExtractScalar, 750 this=this, 751 expression=self.dialect.to_json_path(path), 752 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 753 ), 754 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 755 exp.JSONBExtract, 756 this=this, 757 expression=path, 758 ), 759 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 760 exp.JSONBExtractScalar, 761 this=this, 762 expression=path, 763 ), 764 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 765 exp.JSONBContains, 766 this=this, 767 expression=key, 768 ), 769 } 770 771 EXPRESSION_PARSERS = { 772 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 773 exp.Column: lambda self: self._parse_column(), 774 exp.Condition: lambda self: self._parse_assignment(), 775 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 776 exp.Expression: lambda self: self._parse_expression(), 777 exp.From: lambda self: self._parse_from(joins=True), 778 exp.Group: lambda self: self._parse_group(), 779 exp.Having: lambda self: self._parse_having(), 780 exp.Hint: lambda self: self._parse_hint_body(), 781 exp.Identifier: lambda self: self._parse_id_var(), 782 exp.Join: lambda self: self._parse_join(), 783 exp.Lambda: lambda self: self._parse_lambda(), 784 exp.Lateral: lambda self: self._parse_lateral(), 785 exp.Limit: lambda self: self._parse_limit(), 786 exp.Offset: lambda self: self._parse_offset(), 787 exp.Order: lambda self: self._parse_order(), 788 exp.Ordered: lambda self: self._parse_ordered(), 789 exp.Properties: lambda self: self._parse_properties(), 790 exp.Qualify: lambda self: self._parse_qualify(), 791 exp.Returning: lambda self: self._parse_returning(), 792 exp.Select: lambda self: self._parse_select(), 793 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 794 exp.Table: lambda self: self._parse_table_parts(), 795 exp.TableAlias: lambda self: self._parse_table_alias(), 796 exp.Tuple: lambda self: self._parse_value(), 797 exp.Whens: lambda self: self._parse_when_matched(), 798 exp.Where: lambda self: self._parse_where(), 799 exp.Window: lambda self: self._parse_named_window(), 800 exp.With: lambda self: self._parse_with(), 801 "JOIN_TYPE": lambda self: self._parse_join_parts(), 802 } 803 804 STATEMENT_PARSERS = { 805 TokenType.ALTER: lambda self: self._parse_alter(), 806 TokenType.ANALYZE: lambda self: self._parse_analyze(), 807 TokenType.BEGIN: lambda self: self._parse_transaction(), 808 TokenType.CACHE: lambda self: self._parse_cache(), 809 TokenType.COMMENT: lambda self: self._parse_comment(), 810 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 811 TokenType.COPY: lambda self: self._parse_copy(), 812 TokenType.CREATE: lambda self: self._parse_create(), 813 TokenType.DELETE: lambda self: self._parse_delete(), 814 TokenType.DESC: lambda self: self._parse_describe(), 815 TokenType.DESCRIBE: lambda self: self._parse_describe(), 816 TokenType.DROP: lambda self: self._parse_drop(), 817 TokenType.GRANT: lambda self: self._parse_grant(), 818 TokenType.INSERT: lambda self: self._parse_insert(), 819 TokenType.KILL: lambda self: self._parse_kill(), 820 TokenType.LOAD: lambda self: self._parse_load(), 821 TokenType.MERGE: lambda self: self._parse_merge(), 822 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 823 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 824 TokenType.REFRESH: lambda self: self._parse_refresh(), 825 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 826 TokenType.SET: lambda self: self._parse_set(), 827 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 828 TokenType.UNCACHE: lambda self: self._parse_uncache(), 829 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 830 TokenType.UPDATE: lambda self: self._parse_update(), 831 TokenType.USE: lambda self: self.expression( 832 exp.Use, 833 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 834 this=self._parse_table(schema=False), 835 ), 836 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 837 } 838 839 UNARY_PARSERS = { 840 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 841 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 842 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 843 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 844 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 845 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 846 } 847 848 STRING_PARSERS = { 849 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 850 exp.RawString, this=token.text 851 ), 852 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 853 exp.National, this=token.text 854 ), 855 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 856 TokenType.STRING: lambda self, token: self.expression( 857 exp.Literal, this=token.text, is_string=True 858 ), 859 TokenType.UNICODE_STRING: lambda self, token: self.expression( 860 exp.UnicodeString, 861 this=token.text, 862 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 863 ), 864 } 865 866 NUMERIC_PARSERS = { 867 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 868 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 869 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 870 TokenType.NUMBER: lambda self, token: self.expression( 871 exp.Literal, this=token.text, is_string=False 872 ), 873 } 874 875 PRIMARY_PARSERS = { 876 **STRING_PARSERS, 877 **NUMERIC_PARSERS, 878 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 879 TokenType.NULL: lambda self, _: self.expression(exp.Null), 880 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 881 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 882 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 883 TokenType.STAR: lambda self, _: self._parse_star_ops(), 884 } 885 886 PLACEHOLDER_PARSERS = { 887 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 888 TokenType.PARAMETER: lambda self: self._parse_parameter(), 889 TokenType.COLON: lambda self: ( 890 self.expression(exp.Placeholder, this=self._prev.text) 891 if self._match_set(self.ID_VAR_TOKENS) 892 else None 893 ), 894 } 895 896 RANGE_PARSERS = { 897 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 898 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 899 TokenType.GLOB: binary_range_parser(exp.Glob), 900 TokenType.ILIKE: binary_range_parser(exp.ILike), 901 TokenType.IN: lambda self, this: self._parse_in(this), 902 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 903 TokenType.IS: lambda self, this: self._parse_is(this), 904 TokenType.LIKE: binary_range_parser(exp.Like), 905 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 906 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 907 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 908 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 909 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 910 } 911 912 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 913 "ALLOWED_VALUES": lambda self: self.expression( 914 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 915 ), 916 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 917 "AUTO": lambda self: self._parse_auto_property(), 918 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 919 "BACKUP": lambda self: self.expression( 920 exp.BackupProperty, this=self._parse_var(any_token=True) 921 ), 922 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 923 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 924 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 925 "CHECKSUM": lambda self: self._parse_checksum(), 926 "CLUSTER BY": lambda self: self._parse_cluster(), 927 "CLUSTERED": lambda self: self._parse_clustered_by(), 928 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 929 exp.CollateProperty, **kwargs 930 ), 931 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 932 "CONTAINS": lambda self: self._parse_contains_property(), 933 "COPY": lambda self: self._parse_copy_property(), 934 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 935 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 936 "DEFINER": lambda self: self._parse_definer(), 937 "DETERMINISTIC": lambda self: self.expression( 938 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 939 ), 940 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 941 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 942 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 943 "DISTKEY": lambda self: self._parse_distkey(), 944 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 945 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 946 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 947 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 948 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 949 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 950 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 951 "FREESPACE": lambda self: self._parse_freespace(), 952 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 953 "HEAP": lambda self: self.expression(exp.HeapProperty), 954 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 955 "IMMUTABLE": lambda self: self.expression( 956 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 957 ), 958 "INHERITS": lambda self: self.expression( 959 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 960 ), 961 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 962 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 963 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 964 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 965 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 966 "LIKE": lambda self: self._parse_create_like(), 967 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 968 "LOCK": lambda self: self._parse_locking(), 969 "LOCKING": lambda self: self._parse_locking(), 970 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 971 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 972 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 973 "MODIFIES": lambda self: self._parse_modifies_property(), 974 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 975 "NO": lambda self: self._parse_no_property(), 976 "ON": lambda self: self._parse_on_property(), 977 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 978 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 979 "PARTITION": lambda self: self._parse_partitioned_of(), 980 "PARTITION BY": lambda self: self._parse_partitioned_by(), 981 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 982 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 983 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 984 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 985 "READS": lambda self: self._parse_reads_property(), 986 "REMOTE": lambda self: self._parse_remote_with_connection(), 987 "RETURNS": lambda self: self._parse_returns(), 988 "STRICT": lambda self: self.expression(exp.StrictProperty), 989 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 990 "ROW": lambda self: self._parse_row(), 991 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 992 "SAMPLE": lambda self: self.expression( 993 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 994 ), 995 "SECURE": lambda self: self.expression(exp.SecureProperty), 996 "SECURITY": lambda self: self._parse_security(), 997 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 998 "SETTINGS": lambda self: self._parse_settings_property(), 999 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1000 "SORTKEY": lambda self: self._parse_sortkey(), 1001 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1002 "STABLE": lambda self: self.expression( 1003 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1004 ), 1005 "STORED": lambda self: self._parse_stored(), 1006 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1007 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1008 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1009 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1010 "TO": lambda self: self._parse_to_table(), 1011 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1012 "TRANSFORM": lambda self: self.expression( 1013 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1014 ), 1015 "TTL": lambda self: self._parse_ttl(), 1016 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1017 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1018 "VOLATILE": lambda self: self._parse_volatile_property(), 1019 "WITH": lambda self: self._parse_with_property(), 1020 } 1021 1022 CONSTRAINT_PARSERS = { 1023 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1024 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1025 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1026 "CHARACTER SET": lambda self: self.expression( 1027 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1028 ), 1029 "CHECK": lambda self: self.expression( 1030 exp.CheckColumnConstraint, 1031 this=self._parse_wrapped(self._parse_assignment), 1032 enforced=self._match_text_seq("ENFORCED"), 1033 ), 1034 "COLLATE": lambda self: self.expression( 1035 exp.CollateColumnConstraint, 1036 this=self._parse_identifier() or self._parse_column(), 1037 ), 1038 "COMMENT": lambda self: self.expression( 1039 exp.CommentColumnConstraint, this=self._parse_string() 1040 ), 1041 "COMPRESS": lambda self: self._parse_compress(), 1042 "CLUSTERED": lambda self: self.expression( 1043 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1044 ), 1045 "NONCLUSTERED": lambda self: self.expression( 1046 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1047 ), 1048 "DEFAULT": lambda self: self.expression( 1049 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1050 ), 1051 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1052 "EPHEMERAL": lambda self: self.expression( 1053 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1054 ), 1055 "EXCLUDE": lambda self: self.expression( 1056 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1057 ), 1058 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1059 "FORMAT": lambda self: self.expression( 1060 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1061 ), 1062 "GENERATED": lambda self: self._parse_generated_as_identity(), 1063 "IDENTITY": lambda self: self._parse_auto_increment(), 1064 "INLINE": lambda self: self._parse_inline(), 1065 "LIKE": lambda self: self._parse_create_like(), 1066 "NOT": lambda self: self._parse_not_constraint(), 1067 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1068 "ON": lambda self: ( 1069 self._match(TokenType.UPDATE) 1070 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1071 ) 1072 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1073 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1074 "PERIOD": lambda self: self._parse_period_for_system_time(), 1075 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1076 "REFERENCES": lambda self: self._parse_references(match=False), 1077 "TITLE": lambda self: self.expression( 1078 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1081 "UNIQUE": lambda self: self._parse_unique(), 1082 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1083 "WATERMARK": lambda self: self.expression( 1084 exp.WatermarkColumnConstraint, 1085 this=self._match(TokenType.FOR) and self._parse_column(), 1086 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1087 ), 1088 "WITH": lambda self: self.expression( 1089 exp.Properties, expressions=self._parse_wrapped_properties() 1090 ), 1091 } 1092 1093 ALTER_PARSERS = { 1094 "ADD": lambda self: self._parse_alter_table_add(), 1095 "AS": lambda self: self._parse_select(), 1096 "ALTER": lambda self: self._parse_alter_table_alter(), 1097 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1098 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1099 "DROP": lambda self: self._parse_alter_table_drop(), 1100 "RENAME": lambda self: self._parse_alter_table_rename(), 1101 "SET": lambda self: self._parse_alter_table_set(), 1102 "SWAP": lambda self: self.expression( 1103 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1104 ), 1105 } 1106 1107 ALTER_ALTER_PARSERS = { 1108 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1109 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1110 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1111 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1112 } 1113 1114 SCHEMA_UNNAMED_CONSTRAINTS = { 1115 "CHECK", 1116 "EXCLUDE", 1117 "FOREIGN KEY", 1118 "LIKE", 1119 "PERIOD", 1120 "PRIMARY KEY", 1121 "UNIQUE", 1122 "WATERMARK", 1123 } 1124 1125 NO_PAREN_FUNCTION_PARSERS = { 1126 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1127 "CASE": lambda self: self._parse_case(), 1128 "CONNECT_BY_ROOT": lambda self: self.expression( 1129 exp.ConnectByRoot, this=self._parse_column() 1130 ), 1131 "IF": lambda self: self._parse_if(), 1132 } 1133 1134 INVALID_FUNC_NAME_TOKENS = { 1135 TokenType.IDENTIFIER, 1136 TokenType.STRING, 1137 } 1138 1139 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1140 1141 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1142 1143 FUNCTION_PARSERS = { 1144 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1145 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1146 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1147 "DECODE": lambda self: self._parse_decode(), 1148 "EXTRACT": lambda self: self._parse_extract(), 1149 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1150 "GAP_FILL": lambda self: self._parse_gap_fill(), 1151 "JSON_OBJECT": lambda self: self._parse_json_object(), 1152 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1153 "JSON_TABLE": lambda self: self._parse_json_table(), 1154 "MATCH": lambda self: self._parse_match_against(), 1155 "NORMALIZE": lambda self: self._parse_normalize(), 1156 "OPENJSON": lambda self: self._parse_open_json(), 1157 "OVERLAY": lambda self: self._parse_overlay(), 1158 "POSITION": lambda self: self._parse_position(), 1159 "PREDICT": lambda self: self._parse_predict(), 1160 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1161 "STRING_AGG": lambda self: self._parse_string_agg(), 1162 "SUBSTRING": lambda self: self._parse_substring(), 1163 "TRIM": lambda self: self._parse_trim(), 1164 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1165 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1166 "XMLELEMENT": lambda self: self.expression( 1167 exp.XMLElement, 1168 this=self._match_text_seq("NAME") and self._parse_id_var(), 1169 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1170 ), 1171 "XMLTABLE": lambda self: self._parse_xml_table(), 1172 } 1173 1174 QUERY_MODIFIER_PARSERS = { 1175 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1176 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1177 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1178 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1179 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1180 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1181 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1182 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1183 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1184 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1185 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1186 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1187 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1188 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1189 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1190 TokenType.CLUSTER_BY: lambda self: ( 1191 "cluster", 1192 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1193 ), 1194 TokenType.DISTRIBUTE_BY: lambda self: ( 1195 "distribute", 1196 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1197 ), 1198 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1199 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1200 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1201 } 1202 1203 SET_PARSERS = { 1204 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1205 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1206 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1207 "TRANSACTION": lambda self: self._parse_set_transaction(), 1208 } 1209 1210 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1211 1212 TYPE_LITERAL_PARSERS = { 1213 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1214 } 1215 1216 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1217 1218 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1219 1220 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1221 1222 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1223 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1224 "ISOLATION": ( 1225 ("LEVEL", "REPEATABLE", "READ"), 1226 ("LEVEL", "READ", "COMMITTED"), 1227 ("LEVEL", "READ", "UNCOMITTED"), 1228 ("LEVEL", "SERIALIZABLE"), 1229 ), 1230 "READ": ("WRITE", "ONLY"), 1231 } 1232 1233 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1234 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1235 ) 1236 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1237 1238 CREATE_SEQUENCE: OPTIONS_TYPE = { 1239 "SCALE": ("EXTEND", "NOEXTEND"), 1240 "SHARD": ("EXTEND", "NOEXTEND"), 1241 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1242 **dict.fromkeys( 1243 ( 1244 "SESSION", 1245 "GLOBAL", 1246 "KEEP", 1247 "NOKEEP", 1248 "ORDER", 1249 "NOORDER", 1250 "NOCACHE", 1251 "CYCLE", 1252 "NOCYCLE", 1253 "NOMINVALUE", 1254 "NOMAXVALUE", 1255 "NOSCALE", 1256 "NOSHARD", 1257 ), 1258 tuple(), 1259 ), 1260 } 1261 1262 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1263 1264 USABLES: OPTIONS_TYPE = dict.fromkeys( 1265 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1266 ) 1267 1268 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1269 1270 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1271 "TYPE": ("EVOLUTION",), 1272 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1273 } 1274 1275 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1276 1277 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1278 1279 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1280 "NOT": ("ENFORCED",), 1281 "MATCH": ( 1282 "FULL", 1283 "PARTIAL", 1284 "SIMPLE", 1285 ), 1286 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1287 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1288 } 1289 1290 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1291 1292 CLONE_KEYWORDS = {"CLONE", "COPY"} 1293 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1294 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1295 1296 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1297 1298 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1299 1300 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1301 1302 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1303 1304 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1305 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1306 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1307 1308 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1309 1310 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1311 1312 ADD_CONSTRAINT_TOKENS = { 1313 TokenType.CONSTRAINT, 1314 TokenType.FOREIGN_KEY, 1315 TokenType.INDEX, 1316 TokenType.KEY, 1317 TokenType.PRIMARY_KEY, 1318 TokenType.UNIQUE, 1319 } 1320 1321 DISTINCT_TOKENS = {TokenType.DISTINCT} 1322 1323 NULL_TOKENS = {TokenType.NULL} 1324 1325 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1326 1327 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1328 1329 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1330 1331 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1332 1333 ODBC_DATETIME_LITERALS = { 1334 "d": exp.Date, 1335 "t": exp.Time, 1336 "ts": exp.Timestamp, 1337 } 1338 1339 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1340 1341 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1342 1343 # The style options for the DESCRIBE statement 1344 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1345 1346 # The style options for the ANALYZE statement 1347 ANALYZE_STYLES = { 1348 "BUFFER_USAGE_LIMIT", 1349 "FULL", 1350 "LOCAL", 1351 "NO_WRITE_TO_BINLOG", 1352 "SAMPLE", 1353 "SKIP_LOCKED", 1354 "VERBOSE", 1355 } 1356 1357 ANALYZE_EXPRESSION_PARSERS = { 1358 "ALL": lambda self: self._parse_analyze_columns(), 1359 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1360 "DELETE": lambda self: self._parse_analyze_delete(), 1361 "DROP": lambda self: self._parse_analyze_histogram(), 1362 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1363 "LIST": lambda self: self._parse_analyze_list(), 1364 "PREDICATE": lambda self: self._parse_analyze_columns(), 1365 "UPDATE": lambda self: self._parse_analyze_histogram(), 1366 "VALIDATE": lambda self: self._parse_analyze_validate(), 1367 } 1368 1369 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1370 1371 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1372 1373 OPERATION_MODIFIERS: t.Set[str] = set() 1374 1375 STRICT_CAST = True 1376 1377 PREFIXED_PIVOT_COLUMNS = False 1378 IDENTIFY_PIVOT_STRINGS = False 1379 1380 LOG_DEFAULTS_TO_LN = False 1381 1382 # Whether ADD is present for each column added by ALTER TABLE 1383 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1384 1385 # Whether the table sample clause expects CSV syntax 1386 TABLESAMPLE_CSV = False 1387 1388 # The default method used for table sampling 1389 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1390 1391 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1392 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1393 1394 # Whether the TRIM function expects the characters to trim as its first argument 1395 TRIM_PATTERN_FIRST = False 1396 1397 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1398 STRING_ALIASES = False 1399 1400 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1401 MODIFIERS_ATTACHED_TO_SET_OP = True 1402 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1403 1404 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1405 NO_PAREN_IF_COMMANDS = True 1406 1407 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1408 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1409 1410 # Whether the `:` operator is used to extract a value from a VARIANT column 1411 COLON_IS_VARIANT_EXTRACT = False 1412 1413 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1414 # If this is True and '(' is not found, the keyword will be treated as an identifier 1415 VALUES_FOLLOWED_BY_PAREN = True 1416 1417 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1418 SUPPORTS_IMPLICIT_UNNEST = False 1419 1420 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1421 INTERVAL_SPANS = True 1422 1423 # Whether a PARTITION clause can follow a table reference 1424 SUPPORTS_PARTITION_SELECTION = False 1425 1426 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1427 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1428 1429 # Whether the 'AS' keyword is optional in the CTE definition syntax 1430 OPTIONAL_ALIAS_TOKEN_CTE = True 1431 1432 __slots__ = ( 1433 "error_level", 1434 "error_message_context", 1435 "max_errors", 1436 "dialect", 1437 "sql", 1438 "errors", 1439 "_tokens", 1440 "_index", 1441 "_curr", 1442 "_next", 1443 "_prev", 1444 "_prev_comments", 1445 ) 1446 1447 # Autofilled 1448 SHOW_TRIE: t.Dict = {} 1449 SET_TRIE: t.Dict = {} 1450 1451 def __init__( 1452 self, 1453 error_level: t.Optional[ErrorLevel] = None, 1454 error_message_context: int = 100, 1455 max_errors: int = 3, 1456 dialect: DialectType = None, 1457 ): 1458 from sqlglot.dialects import Dialect 1459 1460 self.error_level = error_level or ErrorLevel.IMMEDIATE 1461 self.error_message_context = error_message_context 1462 self.max_errors = max_errors 1463 self.dialect = Dialect.get_or_raise(dialect) 1464 self.reset() 1465 1466 def reset(self): 1467 self.sql = "" 1468 self.errors = [] 1469 self._tokens = [] 1470 self._index = 0 1471 self._curr = None 1472 self._next = None 1473 self._prev = None 1474 self._prev_comments = None 1475 1476 def parse( 1477 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1478 ) -> t.List[t.Optional[exp.Expression]]: 1479 """ 1480 Parses a list of tokens and returns a list of syntax trees, one tree 1481 per parsed SQL statement. 1482 1483 Args: 1484 raw_tokens: The list of tokens. 1485 sql: The original SQL string, used to produce helpful debug messages. 1486 1487 Returns: 1488 The list of the produced syntax trees. 1489 """ 1490 return self._parse( 1491 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1492 ) 1493 1494 def parse_into( 1495 self, 1496 expression_types: exp.IntoType, 1497 raw_tokens: t.List[Token], 1498 sql: t.Optional[str] = None, 1499 ) -> t.List[t.Optional[exp.Expression]]: 1500 """ 1501 Parses a list of tokens into a given Expression type. If a collection of Expression 1502 types is given instead, this method will try to parse the token list into each one 1503 of them, stopping at the first for which the parsing succeeds. 1504 1505 Args: 1506 expression_types: The expression type(s) to try and parse the token list into. 1507 raw_tokens: The list of tokens. 1508 sql: The original SQL string, used to produce helpful debug messages. 1509 1510 Returns: 1511 The target Expression. 1512 """ 1513 errors = [] 1514 for expression_type in ensure_list(expression_types): 1515 parser = self.EXPRESSION_PARSERS.get(expression_type) 1516 if not parser: 1517 raise TypeError(f"No parser registered for {expression_type}") 1518 1519 try: 1520 return self._parse(parser, raw_tokens, sql) 1521 except ParseError as e: 1522 e.errors[0]["into_expression"] = expression_type 1523 errors.append(e) 1524 1525 raise ParseError( 1526 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1527 errors=merge_errors(errors), 1528 ) from errors[-1] 1529 1530 def _parse( 1531 self, 1532 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1533 raw_tokens: t.List[Token], 1534 sql: t.Optional[str] = None, 1535 ) -> t.List[t.Optional[exp.Expression]]: 1536 self.reset() 1537 self.sql = sql or "" 1538 1539 total = len(raw_tokens) 1540 chunks: t.List[t.List[Token]] = [[]] 1541 1542 for i, token in enumerate(raw_tokens): 1543 if token.token_type == TokenType.SEMICOLON: 1544 if token.comments: 1545 chunks.append([token]) 1546 1547 if i < total - 1: 1548 chunks.append([]) 1549 else: 1550 chunks[-1].append(token) 1551 1552 expressions = [] 1553 1554 for tokens in chunks: 1555 self._index = -1 1556 self._tokens = tokens 1557 self._advance() 1558 1559 expressions.append(parse_method(self)) 1560 1561 if self._index < len(self._tokens): 1562 self.raise_error("Invalid expression / Unexpected token") 1563 1564 self.check_errors() 1565 1566 return expressions 1567 1568 def check_errors(self) -> None: 1569 """Logs or raises any found errors, depending on the chosen error level setting.""" 1570 if self.error_level == ErrorLevel.WARN: 1571 for error in self.errors: 1572 logger.error(str(error)) 1573 elif self.error_level == ErrorLevel.RAISE and self.errors: 1574 raise ParseError( 1575 concat_messages(self.errors, self.max_errors), 1576 errors=merge_errors(self.errors), 1577 ) 1578 1579 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1580 """ 1581 Appends an error in the list of recorded errors or raises it, depending on the chosen 1582 error level setting. 1583 """ 1584 token = token or self._curr or self._prev or Token.string("") 1585 start = token.start 1586 end = token.end + 1 1587 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1588 highlight = self.sql[start:end] 1589 end_context = self.sql[end : end + self.error_message_context] 1590 1591 error = ParseError.new( 1592 f"{message}. Line {token.line}, Col: {token.col}.\n" 1593 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1594 description=message, 1595 line=token.line, 1596 col=token.col, 1597 start_context=start_context, 1598 highlight=highlight, 1599 end_context=end_context, 1600 ) 1601 1602 if self.error_level == ErrorLevel.IMMEDIATE: 1603 raise error 1604 1605 self.errors.append(error) 1606 1607 def expression( 1608 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1609 ) -> E: 1610 """ 1611 Creates a new, validated Expression. 1612 1613 Args: 1614 exp_class: The expression class to instantiate. 1615 comments: An optional list of comments to attach to the expression. 1616 kwargs: The arguments to set for the expression along with their respective values. 1617 1618 Returns: 1619 The target expression. 1620 """ 1621 instance = exp_class(**kwargs) 1622 instance.add_comments(comments) if comments else self._add_comments(instance) 1623 return self.validate_expression(instance) 1624 1625 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1626 if expression and self._prev_comments: 1627 expression.add_comments(self._prev_comments) 1628 self._prev_comments = None 1629 1630 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1631 """ 1632 Validates an Expression, making sure that all its mandatory arguments are set. 1633 1634 Args: 1635 expression: The expression to validate. 1636 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1637 1638 Returns: 1639 The validated expression. 1640 """ 1641 if self.error_level != ErrorLevel.IGNORE: 1642 for error_message in expression.error_messages(args): 1643 self.raise_error(error_message) 1644 1645 return expression 1646 1647 def _find_sql(self, start: Token, end: Token) -> str: 1648 return self.sql[start.start : end.end + 1] 1649 1650 def _is_connected(self) -> bool: 1651 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1652 1653 def _advance(self, times: int = 1) -> None: 1654 self._index += times 1655 self._curr = seq_get(self._tokens, self._index) 1656 self._next = seq_get(self._tokens, self._index + 1) 1657 1658 if self._index > 0: 1659 self._prev = self._tokens[self._index - 1] 1660 self._prev_comments = self._prev.comments 1661 else: 1662 self._prev = None 1663 self._prev_comments = None 1664 1665 def _retreat(self, index: int) -> None: 1666 if index != self._index: 1667 self._advance(index - self._index) 1668 1669 def _warn_unsupported(self) -> None: 1670 if len(self._tokens) <= 1: 1671 return 1672 1673 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1674 # interested in emitting a warning for the one being currently processed. 1675 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1676 1677 logger.warning( 1678 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1679 ) 1680 1681 def _parse_command(self) -> exp.Command: 1682 self._warn_unsupported() 1683 return self.expression( 1684 exp.Command, 1685 comments=self._prev_comments, 1686 this=self._prev.text.upper(), 1687 expression=self._parse_string(), 1688 ) 1689 1690 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1691 """ 1692 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1693 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1694 solve this by setting & resetting the parser state accordingly 1695 """ 1696 index = self._index 1697 error_level = self.error_level 1698 1699 self.error_level = ErrorLevel.IMMEDIATE 1700 try: 1701 this = parse_method() 1702 except ParseError: 1703 this = None 1704 finally: 1705 if not this or retreat: 1706 self._retreat(index) 1707 self.error_level = error_level 1708 1709 return this 1710 1711 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1712 start = self._prev 1713 exists = self._parse_exists() if allow_exists else None 1714 1715 self._match(TokenType.ON) 1716 1717 materialized = self._match_text_seq("MATERIALIZED") 1718 kind = self._match_set(self.CREATABLES) and self._prev 1719 if not kind: 1720 return self._parse_as_command(start) 1721 1722 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1723 this = self._parse_user_defined_function(kind=kind.token_type) 1724 elif kind.token_type == TokenType.TABLE: 1725 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1726 elif kind.token_type == TokenType.COLUMN: 1727 this = self._parse_column() 1728 else: 1729 this = self._parse_id_var() 1730 1731 self._match(TokenType.IS) 1732 1733 return self.expression( 1734 exp.Comment, 1735 this=this, 1736 kind=kind.text, 1737 expression=self._parse_string(), 1738 exists=exists, 1739 materialized=materialized, 1740 ) 1741 1742 def _parse_to_table( 1743 self, 1744 ) -> exp.ToTableProperty: 1745 table = self._parse_table_parts(schema=True) 1746 return self.expression(exp.ToTableProperty, this=table) 1747 1748 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1749 def _parse_ttl(self) -> exp.Expression: 1750 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1751 this = self._parse_bitwise() 1752 1753 if self._match_text_seq("DELETE"): 1754 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1755 if self._match_text_seq("RECOMPRESS"): 1756 return self.expression( 1757 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1758 ) 1759 if self._match_text_seq("TO", "DISK"): 1760 return self.expression( 1761 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1762 ) 1763 if self._match_text_seq("TO", "VOLUME"): 1764 return self.expression( 1765 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1766 ) 1767 1768 return this 1769 1770 expressions = self._parse_csv(_parse_ttl_action) 1771 where = self._parse_where() 1772 group = self._parse_group() 1773 1774 aggregates = None 1775 if group and self._match(TokenType.SET): 1776 aggregates = self._parse_csv(self._parse_set_item) 1777 1778 return self.expression( 1779 exp.MergeTreeTTL, 1780 expressions=expressions, 1781 where=where, 1782 group=group, 1783 aggregates=aggregates, 1784 ) 1785 1786 def _parse_statement(self) -> t.Optional[exp.Expression]: 1787 if self._curr is None: 1788 return None 1789 1790 if self._match_set(self.STATEMENT_PARSERS): 1791 comments = self._prev_comments 1792 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1793 stmt.add_comments(comments, prepend=True) 1794 return stmt 1795 1796 if self._match_set(self.dialect.tokenizer.COMMANDS): 1797 return self._parse_command() 1798 1799 expression = self._parse_expression() 1800 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1801 return self._parse_query_modifiers(expression) 1802 1803 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1804 start = self._prev 1805 temporary = self._match(TokenType.TEMPORARY) 1806 materialized = self._match_text_seq("MATERIALIZED") 1807 1808 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1809 if not kind: 1810 return self._parse_as_command(start) 1811 1812 concurrently = self._match_text_seq("CONCURRENTLY") 1813 if_exists = exists or self._parse_exists() 1814 1815 if kind == "COLUMN": 1816 this = self._parse_column() 1817 else: 1818 this = self._parse_table_parts( 1819 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1820 ) 1821 1822 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1823 1824 if self._match(TokenType.L_PAREN, advance=False): 1825 expressions = self._parse_wrapped_csv(self._parse_types) 1826 else: 1827 expressions = None 1828 1829 return self.expression( 1830 exp.Drop, 1831 exists=if_exists, 1832 this=this, 1833 expressions=expressions, 1834 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1835 temporary=temporary, 1836 materialized=materialized, 1837 cascade=self._match_text_seq("CASCADE"), 1838 constraints=self._match_text_seq("CONSTRAINTS"), 1839 purge=self._match_text_seq("PURGE"), 1840 cluster=cluster, 1841 concurrently=concurrently, 1842 ) 1843 1844 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1845 return ( 1846 self._match_text_seq("IF") 1847 and (not not_ or self._match(TokenType.NOT)) 1848 and self._match(TokenType.EXISTS) 1849 ) 1850 1851 def _parse_create(self) -> exp.Create | exp.Command: 1852 # Note: this can't be None because we've matched a statement parser 1853 start = self._prev 1854 1855 replace = ( 1856 start.token_type == TokenType.REPLACE 1857 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1858 or self._match_pair(TokenType.OR, TokenType.ALTER) 1859 ) 1860 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1861 1862 unique = self._match(TokenType.UNIQUE) 1863 1864 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1865 clustered = True 1866 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1867 "COLUMNSTORE" 1868 ): 1869 clustered = False 1870 else: 1871 clustered = None 1872 1873 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1874 self._advance() 1875 1876 properties = None 1877 create_token = self._match_set(self.CREATABLES) and self._prev 1878 1879 if not create_token: 1880 # exp.Properties.Location.POST_CREATE 1881 properties = self._parse_properties() 1882 create_token = self._match_set(self.CREATABLES) and self._prev 1883 1884 if not properties or not create_token: 1885 return self._parse_as_command(start) 1886 1887 concurrently = self._match_text_seq("CONCURRENTLY") 1888 exists = self._parse_exists(not_=True) 1889 this = None 1890 expression: t.Optional[exp.Expression] = None 1891 indexes = None 1892 no_schema_binding = None 1893 begin = None 1894 end = None 1895 clone = None 1896 1897 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1898 nonlocal properties 1899 if properties and temp_props: 1900 properties.expressions.extend(temp_props.expressions) 1901 elif temp_props: 1902 properties = temp_props 1903 1904 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1905 this = self._parse_user_defined_function(kind=create_token.token_type) 1906 1907 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1908 extend_props(self._parse_properties()) 1909 1910 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1911 extend_props(self._parse_properties()) 1912 1913 if not expression: 1914 if self._match(TokenType.COMMAND): 1915 expression = self._parse_as_command(self._prev) 1916 else: 1917 begin = self._match(TokenType.BEGIN) 1918 return_ = self._match_text_seq("RETURN") 1919 1920 if self._match(TokenType.STRING, advance=False): 1921 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1922 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1923 expression = self._parse_string() 1924 extend_props(self._parse_properties()) 1925 else: 1926 expression = self._parse_user_defined_function_expression() 1927 1928 end = self._match_text_seq("END") 1929 1930 if return_: 1931 expression = self.expression(exp.Return, this=expression) 1932 elif create_token.token_type == TokenType.INDEX: 1933 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1934 if not self._match(TokenType.ON): 1935 index = self._parse_id_var() 1936 anonymous = False 1937 else: 1938 index = None 1939 anonymous = True 1940 1941 this = self._parse_index(index=index, anonymous=anonymous) 1942 elif create_token.token_type in self.DB_CREATABLES: 1943 table_parts = self._parse_table_parts( 1944 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1945 ) 1946 1947 # exp.Properties.Location.POST_NAME 1948 self._match(TokenType.COMMA) 1949 extend_props(self._parse_properties(before=True)) 1950 1951 this = self._parse_schema(this=table_parts) 1952 1953 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1954 extend_props(self._parse_properties()) 1955 1956 self._match(TokenType.ALIAS) 1957 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1958 # exp.Properties.Location.POST_ALIAS 1959 extend_props(self._parse_properties()) 1960 1961 if create_token.token_type == TokenType.SEQUENCE: 1962 expression = self._parse_types() 1963 extend_props(self._parse_properties()) 1964 else: 1965 expression = self._parse_ddl_select() 1966 1967 if create_token.token_type == TokenType.TABLE: 1968 # exp.Properties.Location.POST_EXPRESSION 1969 extend_props(self._parse_properties()) 1970 1971 indexes = [] 1972 while True: 1973 index = self._parse_index() 1974 1975 # exp.Properties.Location.POST_INDEX 1976 extend_props(self._parse_properties()) 1977 if not index: 1978 break 1979 else: 1980 self._match(TokenType.COMMA) 1981 indexes.append(index) 1982 elif create_token.token_type == TokenType.VIEW: 1983 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1984 no_schema_binding = True 1985 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1986 extend_props(self._parse_properties()) 1987 1988 shallow = self._match_text_seq("SHALLOW") 1989 1990 if self._match_texts(self.CLONE_KEYWORDS): 1991 copy = self._prev.text.lower() == "copy" 1992 clone = self.expression( 1993 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1994 ) 1995 1996 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1997 return self._parse_as_command(start) 1998 1999 create_kind_text = create_token.text.upper() 2000 return self.expression( 2001 exp.Create, 2002 this=this, 2003 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2004 replace=replace, 2005 refresh=refresh, 2006 unique=unique, 2007 expression=expression, 2008 exists=exists, 2009 properties=properties, 2010 indexes=indexes, 2011 no_schema_binding=no_schema_binding, 2012 begin=begin, 2013 end=end, 2014 clone=clone, 2015 concurrently=concurrently, 2016 clustered=clustered, 2017 ) 2018 2019 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2020 seq = exp.SequenceProperties() 2021 2022 options = [] 2023 index = self._index 2024 2025 while self._curr: 2026 self._match(TokenType.COMMA) 2027 if self._match_text_seq("INCREMENT"): 2028 self._match_text_seq("BY") 2029 self._match_text_seq("=") 2030 seq.set("increment", self._parse_term()) 2031 elif self._match_text_seq("MINVALUE"): 2032 seq.set("minvalue", self._parse_term()) 2033 elif self._match_text_seq("MAXVALUE"): 2034 seq.set("maxvalue", self._parse_term()) 2035 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2036 self._match_text_seq("=") 2037 seq.set("start", self._parse_term()) 2038 elif self._match_text_seq("CACHE"): 2039 # T-SQL allows empty CACHE which is initialized dynamically 2040 seq.set("cache", self._parse_number() or True) 2041 elif self._match_text_seq("OWNED", "BY"): 2042 # "OWNED BY NONE" is the default 2043 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2044 else: 2045 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2046 if opt: 2047 options.append(opt) 2048 else: 2049 break 2050 2051 seq.set("options", options if options else None) 2052 return None if self._index == index else seq 2053 2054 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2055 # only used for teradata currently 2056 self._match(TokenType.COMMA) 2057 2058 kwargs = { 2059 "no": self._match_text_seq("NO"), 2060 "dual": self._match_text_seq("DUAL"), 2061 "before": self._match_text_seq("BEFORE"), 2062 "default": self._match_text_seq("DEFAULT"), 2063 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2064 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2065 "after": self._match_text_seq("AFTER"), 2066 "minimum": self._match_texts(("MIN", "MINIMUM")), 2067 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2068 } 2069 2070 if self._match_texts(self.PROPERTY_PARSERS): 2071 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2072 try: 2073 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2074 except TypeError: 2075 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2076 2077 return None 2078 2079 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2080 return self._parse_wrapped_csv(self._parse_property) 2081 2082 def _parse_property(self) -> t.Optional[exp.Expression]: 2083 if self._match_texts(self.PROPERTY_PARSERS): 2084 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2085 2086 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2087 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2088 2089 if self._match_text_seq("COMPOUND", "SORTKEY"): 2090 return self._parse_sortkey(compound=True) 2091 2092 if self._match_text_seq("SQL", "SECURITY"): 2093 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2094 2095 index = self._index 2096 key = self._parse_column() 2097 2098 if not self._match(TokenType.EQ): 2099 self._retreat(index) 2100 return self._parse_sequence_properties() 2101 2102 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2103 if isinstance(key, exp.Column): 2104 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2105 2106 value = self._parse_bitwise() or self._parse_var(any_token=True) 2107 2108 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2109 if isinstance(value, exp.Column): 2110 value = exp.var(value.name) 2111 2112 return self.expression(exp.Property, this=key, value=value) 2113 2114 def _parse_stored(self) -> exp.FileFormatProperty: 2115 self._match(TokenType.ALIAS) 2116 2117 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2118 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2119 2120 return self.expression( 2121 exp.FileFormatProperty, 2122 this=( 2123 self.expression( 2124 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2125 ) 2126 if input_format or output_format 2127 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2128 ), 2129 ) 2130 2131 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2132 field = self._parse_field() 2133 if isinstance(field, exp.Identifier) and not field.quoted: 2134 field = exp.var(field) 2135 2136 return field 2137 2138 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2139 self._match(TokenType.EQ) 2140 self._match(TokenType.ALIAS) 2141 2142 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2143 2144 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2145 properties = [] 2146 while True: 2147 if before: 2148 prop = self._parse_property_before() 2149 else: 2150 prop = self._parse_property() 2151 if not prop: 2152 break 2153 for p in ensure_list(prop): 2154 properties.append(p) 2155 2156 if properties: 2157 return self.expression(exp.Properties, expressions=properties) 2158 2159 return None 2160 2161 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2162 return self.expression( 2163 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2164 ) 2165 2166 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2167 if self._match_texts(("DEFINER", "INVOKER")): 2168 security_specifier = self._prev.text.upper() 2169 return self.expression(exp.SecurityProperty, this=security_specifier) 2170 return None 2171 2172 def _parse_settings_property(self) -> exp.SettingsProperty: 2173 return self.expression( 2174 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2175 ) 2176 2177 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2178 if self._index >= 2: 2179 pre_volatile_token = self._tokens[self._index - 2] 2180 else: 2181 pre_volatile_token = None 2182 2183 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2184 return exp.VolatileProperty() 2185 2186 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2187 2188 def _parse_retention_period(self) -> exp.Var: 2189 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2190 number = self._parse_number() 2191 number_str = f"{number} " if number else "" 2192 unit = self._parse_var(any_token=True) 2193 return exp.var(f"{number_str}{unit}") 2194 2195 def _parse_system_versioning_property( 2196 self, with_: bool = False 2197 ) -> exp.WithSystemVersioningProperty: 2198 self._match(TokenType.EQ) 2199 prop = self.expression( 2200 exp.WithSystemVersioningProperty, 2201 **{ # type: ignore 2202 "on": True, 2203 "with": with_, 2204 }, 2205 ) 2206 2207 if self._match_text_seq("OFF"): 2208 prop.set("on", False) 2209 return prop 2210 2211 self._match(TokenType.ON) 2212 if self._match(TokenType.L_PAREN): 2213 while self._curr and not self._match(TokenType.R_PAREN): 2214 if self._match_text_seq("HISTORY_TABLE", "="): 2215 prop.set("this", self._parse_table_parts()) 2216 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2217 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2218 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2219 prop.set("retention_period", self._parse_retention_period()) 2220 2221 self._match(TokenType.COMMA) 2222 2223 return prop 2224 2225 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2226 self._match(TokenType.EQ) 2227 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2228 prop = self.expression(exp.DataDeletionProperty, on=on) 2229 2230 if self._match(TokenType.L_PAREN): 2231 while self._curr and not self._match(TokenType.R_PAREN): 2232 if self._match_text_seq("FILTER_COLUMN", "="): 2233 prop.set("filter_column", self._parse_column()) 2234 elif self._match_text_seq("RETENTION_PERIOD", "="): 2235 prop.set("retention_period", self._parse_retention_period()) 2236 2237 self._match(TokenType.COMMA) 2238 2239 return prop 2240 2241 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2242 kind = "HASH" 2243 expressions: t.Optional[t.List[exp.Expression]] = None 2244 if self._match_text_seq("BY", "HASH"): 2245 expressions = self._parse_wrapped_csv(self._parse_id_var) 2246 elif self._match_text_seq("BY", "RANDOM"): 2247 kind = "RANDOM" 2248 2249 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2250 buckets: t.Optional[exp.Expression] = None 2251 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2252 buckets = self._parse_number() 2253 2254 return self.expression( 2255 exp.DistributedByProperty, 2256 expressions=expressions, 2257 kind=kind, 2258 buckets=buckets, 2259 order=self._parse_order(), 2260 ) 2261 2262 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2263 self._match_text_seq("KEY") 2264 expressions = self._parse_wrapped_id_vars() 2265 return self.expression(expr_type, expressions=expressions) 2266 2267 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2268 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2269 prop = self._parse_system_versioning_property(with_=True) 2270 self._match_r_paren() 2271 return prop 2272 2273 if self._match(TokenType.L_PAREN, advance=False): 2274 return self._parse_wrapped_properties() 2275 2276 if self._match_text_seq("JOURNAL"): 2277 return self._parse_withjournaltable() 2278 2279 if self._match_texts(self.VIEW_ATTRIBUTES): 2280 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2281 2282 if self._match_text_seq("DATA"): 2283 return self._parse_withdata(no=False) 2284 elif self._match_text_seq("NO", "DATA"): 2285 return self._parse_withdata(no=True) 2286 2287 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2288 return self._parse_serde_properties(with_=True) 2289 2290 if self._match(TokenType.SCHEMA): 2291 return self.expression( 2292 exp.WithSchemaBindingProperty, 2293 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2294 ) 2295 2296 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2297 return self.expression( 2298 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2299 ) 2300 2301 if not self._next: 2302 return None 2303 2304 return self._parse_withisolatedloading() 2305 2306 def _parse_procedure_option(self) -> exp.Expression | None: 2307 if self._match_text_seq("EXECUTE", "AS"): 2308 return self.expression( 2309 exp.ExecuteAsProperty, 2310 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2311 or self._parse_string(), 2312 ) 2313 2314 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2315 2316 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2317 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2318 self._match(TokenType.EQ) 2319 2320 user = self._parse_id_var() 2321 self._match(TokenType.PARAMETER) 2322 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2323 2324 if not user or not host: 2325 return None 2326 2327 return exp.DefinerProperty(this=f"{user}@{host}") 2328 2329 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2330 self._match(TokenType.TABLE) 2331 self._match(TokenType.EQ) 2332 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2333 2334 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2335 return self.expression(exp.LogProperty, no=no) 2336 2337 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2338 return self.expression(exp.JournalProperty, **kwargs) 2339 2340 def _parse_checksum(self) -> exp.ChecksumProperty: 2341 self._match(TokenType.EQ) 2342 2343 on = None 2344 if self._match(TokenType.ON): 2345 on = True 2346 elif self._match_text_seq("OFF"): 2347 on = False 2348 2349 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2350 2351 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2352 return self.expression( 2353 exp.Cluster, 2354 expressions=( 2355 self._parse_wrapped_csv(self._parse_ordered) 2356 if wrapped 2357 else self._parse_csv(self._parse_ordered) 2358 ), 2359 ) 2360 2361 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2362 self._match_text_seq("BY") 2363 2364 self._match_l_paren() 2365 expressions = self._parse_csv(self._parse_column) 2366 self._match_r_paren() 2367 2368 if self._match_text_seq("SORTED", "BY"): 2369 self._match_l_paren() 2370 sorted_by = self._parse_csv(self._parse_ordered) 2371 self._match_r_paren() 2372 else: 2373 sorted_by = None 2374 2375 self._match(TokenType.INTO) 2376 buckets = self._parse_number() 2377 self._match_text_seq("BUCKETS") 2378 2379 return self.expression( 2380 exp.ClusteredByProperty, 2381 expressions=expressions, 2382 sorted_by=sorted_by, 2383 buckets=buckets, 2384 ) 2385 2386 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2387 if not self._match_text_seq("GRANTS"): 2388 self._retreat(self._index - 1) 2389 return None 2390 2391 return self.expression(exp.CopyGrantsProperty) 2392 2393 def _parse_freespace(self) -> exp.FreespaceProperty: 2394 self._match(TokenType.EQ) 2395 return self.expression( 2396 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2397 ) 2398 2399 def _parse_mergeblockratio( 2400 self, no: bool = False, default: bool = False 2401 ) -> exp.MergeBlockRatioProperty: 2402 if self._match(TokenType.EQ): 2403 return self.expression( 2404 exp.MergeBlockRatioProperty, 2405 this=self._parse_number(), 2406 percent=self._match(TokenType.PERCENT), 2407 ) 2408 2409 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2410 2411 def _parse_datablocksize( 2412 self, 2413 default: t.Optional[bool] = None, 2414 minimum: t.Optional[bool] = None, 2415 maximum: t.Optional[bool] = None, 2416 ) -> exp.DataBlocksizeProperty: 2417 self._match(TokenType.EQ) 2418 size = self._parse_number() 2419 2420 units = None 2421 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2422 units = self._prev.text 2423 2424 return self.expression( 2425 exp.DataBlocksizeProperty, 2426 size=size, 2427 units=units, 2428 default=default, 2429 minimum=minimum, 2430 maximum=maximum, 2431 ) 2432 2433 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2434 self._match(TokenType.EQ) 2435 always = self._match_text_seq("ALWAYS") 2436 manual = self._match_text_seq("MANUAL") 2437 never = self._match_text_seq("NEVER") 2438 default = self._match_text_seq("DEFAULT") 2439 2440 autotemp = None 2441 if self._match_text_seq("AUTOTEMP"): 2442 autotemp = self._parse_schema() 2443 2444 return self.expression( 2445 exp.BlockCompressionProperty, 2446 always=always, 2447 manual=manual, 2448 never=never, 2449 default=default, 2450 autotemp=autotemp, 2451 ) 2452 2453 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2454 index = self._index 2455 no = self._match_text_seq("NO") 2456 concurrent = self._match_text_seq("CONCURRENT") 2457 2458 if not self._match_text_seq("ISOLATED", "LOADING"): 2459 self._retreat(index) 2460 return None 2461 2462 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2463 return self.expression( 2464 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2465 ) 2466 2467 def _parse_locking(self) -> exp.LockingProperty: 2468 if self._match(TokenType.TABLE): 2469 kind = "TABLE" 2470 elif self._match(TokenType.VIEW): 2471 kind = "VIEW" 2472 elif self._match(TokenType.ROW): 2473 kind = "ROW" 2474 elif self._match_text_seq("DATABASE"): 2475 kind = "DATABASE" 2476 else: 2477 kind = None 2478 2479 if kind in ("DATABASE", "TABLE", "VIEW"): 2480 this = self._parse_table_parts() 2481 else: 2482 this = None 2483 2484 if self._match(TokenType.FOR): 2485 for_or_in = "FOR" 2486 elif self._match(TokenType.IN): 2487 for_or_in = "IN" 2488 else: 2489 for_or_in = None 2490 2491 if self._match_text_seq("ACCESS"): 2492 lock_type = "ACCESS" 2493 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2494 lock_type = "EXCLUSIVE" 2495 elif self._match_text_seq("SHARE"): 2496 lock_type = "SHARE" 2497 elif self._match_text_seq("READ"): 2498 lock_type = "READ" 2499 elif self._match_text_seq("WRITE"): 2500 lock_type = "WRITE" 2501 elif self._match_text_seq("CHECKSUM"): 2502 lock_type = "CHECKSUM" 2503 else: 2504 lock_type = None 2505 2506 override = self._match_text_seq("OVERRIDE") 2507 2508 return self.expression( 2509 exp.LockingProperty, 2510 this=this, 2511 kind=kind, 2512 for_or_in=for_or_in, 2513 lock_type=lock_type, 2514 override=override, 2515 ) 2516 2517 def _parse_partition_by(self) -> t.List[exp.Expression]: 2518 if self._match(TokenType.PARTITION_BY): 2519 return self._parse_csv(self._parse_assignment) 2520 return [] 2521 2522 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2523 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2524 if self._match_text_seq("MINVALUE"): 2525 return exp.var("MINVALUE") 2526 if self._match_text_seq("MAXVALUE"): 2527 return exp.var("MAXVALUE") 2528 return self._parse_bitwise() 2529 2530 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2531 expression = None 2532 from_expressions = None 2533 to_expressions = None 2534 2535 if self._match(TokenType.IN): 2536 this = self._parse_wrapped_csv(self._parse_bitwise) 2537 elif self._match(TokenType.FROM): 2538 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2539 self._match_text_seq("TO") 2540 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2541 elif self._match_text_seq("WITH", "(", "MODULUS"): 2542 this = self._parse_number() 2543 self._match_text_seq(",", "REMAINDER") 2544 expression = self._parse_number() 2545 self._match_r_paren() 2546 else: 2547 self.raise_error("Failed to parse partition bound spec.") 2548 2549 return self.expression( 2550 exp.PartitionBoundSpec, 2551 this=this, 2552 expression=expression, 2553 from_expressions=from_expressions, 2554 to_expressions=to_expressions, 2555 ) 2556 2557 # https://www.postgresql.org/docs/current/sql-createtable.html 2558 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2559 if not self._match_text_seq("OF"): 2560 self._retreat(self._index - 1) 2561 return None 2562 2563 this = self._parse_table(schema=True) 2564 2565 if self._match(TokenType.DEFAULT): 2566 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2567 elif self._match_text_seq("FOR", "VALUES"): 2568 expression = self._parse_partition_bound_spec() 2569 else: 2570 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2571 2572 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2573 2574 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2575 self._match(TokenType.EQ) 2576 return self.expression( 2577 exp.PartitionedByProperty, 2578 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2579 ) 2580 2581 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2582 if self._match_text_seq("AND", "STATISTICS"): 2583 statistics = True 2584 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2585 statistics = False 2586 else: 2587 statistics = None 2588 2589 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2590 2591 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2592 if self._match_text_seq("SQL"): 2593 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2594 return None 2595 2596 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2597 if self._match_text_seq("SQL", "DATA"): 2598 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2599 return None 2600 2601 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2602 if self._match_text_seq("PRIMARY", "INDEX"): 2603 return exp.NoPrimaryIndexProperty() 2604 if self._match_text_seq("SQL"): 2605 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2606 return None 2607 2608 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2609 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2610 return exp.OnCommitProperty() 2611 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2612 return exp.OnCommitProperty(delete=True) 2613 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2614 2615 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2616 if self._match_text_seq("SQL", "DATA"): 2617 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2618 return None 2619 2620 def _parse_distkey(self) -> exp.DistKeyProperty: 2621 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2622 2623 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2624 table = self._parse_table(schema=True) 2625 2626 options = [] 2627 while self._match_texts(("INCLUDING", "EXCLUDING")): 2628 this = self._prev.text.upper() 2629 2630 id_var = self._parse_id_var() 2631 if not id_var: 2632 return None 2633 2634 options.append( 2635 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2636 ) 2637 2638 return self.expression(exp.LikeProperty, this=table, expressions=options) 2639 2640 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2641 return self.expression( 2642 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2643 ) 2644 2645 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2646 self._match(TokenType.EQ) 2647 return self.expression( 2648 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2649 ) 2650 2651 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2652 self._match_text_seq("WITH", "CONNECTION") 2653 return self.expression( 2654 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2655 ) 2656 2657 def _parse_returns(self) -> exp.ReturnsProperty: 2658 value: t.Optional[exp.Expression] 2659 null = None 2660 is_table = self._match(TokenType.TABLE) 2661 2662 if is_table: 2663 if self._match(TokenType.LT): 2664 value = self.expression( 2665 exp.Schema, 2666 this="TABLE", 2667 expressions=self._parse_csv(self._parse_struct_types), 2668 ) 2669 if not self._match(TokenType.GT): 2670 self.raise_error("Expecting >") 2671 else: 2672 value = self._parse_schema(exp.var("TABLE")) 2673 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2674 null = True 2675 value = None 2676 else: 2677 value = self._parse_types() 2678 2679 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2680 2681 def _parse_describe(self) -> exp.Describe: 2682 kind = self._match_set(self.CREATABLES) and self._prev.text 2683 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2684 if self._match(TokenType.DOT): 2685 style = None 2686 self._retreat(self._index - 2) 2687 2688 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2689 2690 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2691 this = self._parse_statement() 2692 else: 2693 this = self._parse_table(schema=True) 2694 2695 properties = self._parse_properties() 2696 expressions = properties.expressions if properties else None 2697 partition = self._parse_partition() 2698 return self.expression( 2699 exp.Describe, 2700 this=this, 2701 style=style, 2702 kind=kind, 2703 expressions=expressions, 2704 partition=partition, 2705 format=format, 2706 ) 2707 2708 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2709 kind = self._prev.text.upper() 2710 expressions = [] 2711 2712 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2713 if self._match(TokenType.WHEN): 2714 expression = self._parse_disjunction() 2715 self._match(TokenType.THEN) 2716 else: 2717 expression = None 2718 2719 else_ = self._match(TokenType.ELSE) 2720 2721 if not self._match(TokenType.INTO): 2722 return None 2723 2724 return self.expression( 2725 exp.ConditionalInsert, 2726 this=self.expression( 2727 exp.Insert, 2728 this=self._parse_table(schema=True), 2729 expression=self._parse_derived_table_values(), 2730 ), 2731 expression=expression, 2732 else_=else_, 2733 ) 2734 2735 expression = parse_conditional_insert() 2736 while expression is not None: 2737 expressions.append(expression) 2738 expression = parse_conditional_insert() 2739 2740 return self.expression( 2741 exp.MultitableInserts, 2742 kind=kind, 2743 comments=comments, 2744 expressions=expressions, 2745 source=self._parse_table(), 2746 ) 2747 2748 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2749 comments = [] 2750 hint = self._parse_hint() 2751 overwrite = self._match(TokenType.OVERWRITE) 2752 ignore = self._match(TokenType.IGNORE) 2753 local = self._match_text_seq("LOCAL") 2754 alternative = None 2755 is_function = None 2756 2757 if self._match_text_seq("DIRECTORY"): 2758 this: t.Optional[exp.Expression] = self.expression( 2759 exp.Directory, 2760 this=self._parse_var_or_string(), 2761 local=local, 2762 row_format=self._parse_row_format(match_row=True), 2763 ) 2764 else: 2765 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2766 comments += ensure_list(self._prev_comments) 2767 return self._parse_multitable_inserts(comments) 2768 2769 if self._match(TokenType.OR): 2770 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2771 2772 self._match(TokenType.INTO) 2773 comments += ensure_list(self._prev_comments) 2774 self._match(TokenType.TABLE) 2775 is_function = self._match(TokenType.FUNCTION) 2776 2777 this = ( 2778 self._parse_table(schema=True, parse_partition=True) 2779 if not is_function 2780 else self._parse_function() 2781 ) 2782 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2783 this.set("alias", self._parse_table_alias()) 2784 2785 returning = self._parse_returning() 2786 2787 return self.expression( 2788 exp.Insert, 2789 comments=comments, 2790 hint=hint, 2791 is_function=is_function, 2792 this=this, 2793 stored=self._match_text_seq("STORED") and self._parse_stored(), 2794 by_name=self._match_text_seq("BY", "NAME"), 2795 exists=self._parse_exists(), 2796 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2797 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2798 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2799 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2800 conflict=self._parse_on_conflict(), 2801 returning=returning or self._parse_returning(), 2802 overwrite=overwrite, 2803 alternative=alternative, 2804 ignore=ignore, 2805 source=self._match(TokenType.TABLE) and self._parse_table(), 2806 ) 2807 2808 def _parse_kill(self) -> exp.Kill: 2809 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2810 2811 return self.expression( 2812 exp.Kill, 2813 this=self._parse_primary(), 2814 kind=kind, 2815 ) 2816 2817 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2818 conflict = self._match_text_seq("ON", "CONFLICT") 2819 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2820 2821 if not conflict and not duplicate: 2822 return None 2823 2824 conflict_keys = None 2825 constraint = None 2826 2827 if conflict: 2828 if self._match_text_seq("ON", "CONSTRAINT"): 2829 constraint = self._parse_id_var() 2830 elif self._match(TokenType.L_PAREN): 2831 conflict_keys = self._parse_csv(self._parse_id_var) 2832 self._match_r_paren() 2833 2834 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2835 if self._prev.token_type == TokenType.UPDATE: 2836 self._match(TokenType.SET) 2837 expressions = self._parse_csv(self._parse_equality) 2838 else: 2839 expressions = None 2840 2841 return self.expression( 2842 exp.OnConflict, 2843 duplicate=duplicate, 2844 expressions=expressions, 2845 action=action, 2846 conflict_keys=conflict_keys, 2847 constraint=constraint, 2848 where=self._parse_where(), 2849 ) 2850 2851 def _parse_returning(self) -> t.Optional[exp.Returning]: 2852 if not self._match(TokenType.RETURNING): 2853 return None 2854 return self.expression( 2855 exp.Returning, 2856 expressions=self._parse_csv(self._parse_expression), 2857 into=self._match(TokenType.INTO) and self._parse_table_part(), 2858 ) 2859 2860 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2861 if not self._match(TokenType.FORMAT): 2862 return None 2863 return self._parse_row_format() 2864 2865 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2866 index = self._index 2867 with_ = with_ or self._match_text_seq("WITH") 2868 2869 if not self._match(TokenType.SERDE_PROPERTIES): 2870 self._retreat(index) 2871 return None 2872 return self.expression( 2873 exp.SerdeProperties, 2874 **{ # type: ignore 2875 "expressions": self._parse_wrapped_properties(), 2876 "with": with_, 2877 }, 2878 ) 2879 2880 def _parse_row_format( 2881 self, match_row: bool = False 2882 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2883 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2884 return None 2885 2886 if self._match_text_seq("SERDE"): 2887 this = self._parse_string() 2888 2889 serde_properties = self._parse_serde_properties() 2890 2891 return self.expression( 2892 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2893 ) 2894 2895 self._match_text_seq("DELIMITED") 2896 2897 kwargs = {} 2898 2899 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2900 kwargs["fields"] = self._parse_string() 2901 if self._match_text_seq("ESCAPED", "BY"): 2902 kwargs["escaped"] = self._parse_string() 2903 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2904 kwargs["collection_items"] = self._parse_string() 2905 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2906 kwargs["map_keys"] = self._parse_string() 2907 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2908 kwargs["lines"] = self._parse_string() 2909 if self._match_text_seq("NULL", "DEFINED", "AS"): 2910 kwargs["null"] = self._parse_string() 2911 2912 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2913 2914 def _parse_load(self) -> exp.LoadData | exp.Command: 2915 if self._match_text_seq("DATA"): 2916 local = self._match_text_seq("LOCAL") 2917 self._match_text_seq("INPATH") 2918 inpath = self._parse_string() 2919 overwrite = self._match(TokenType.OVERWRITE) 2920 self._match_pair(TokenType.INTO, TokenType.TABLE) 2921 2922 return self.expression( 2923 exp.LoadData, 2924 this=self._parse_table(schema=True), 2925 local=local, 2926 overwrite=overwrite, 2927 inpath=inpath, 2928 partition=self._parse_partition(), 2929 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2930 serde=self._match_text_seq("SERDE") and self._parse_string(), 2931 ) 2932 return self._parse_as_command(self._prev) 2933 2934 def _parse_delete(self) -> exp.Delete: 2935 # This handles MySQL's "Multiple-Table Syntax" 2936 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2937 tables = None 2938 if not self._match(TokenType.FROM, advance=False): 2939 tables = self._parse_csv(self._parse_table) or None 2940 2941 returning = self._parse_returning() 2942 2943 return self.expression( 2944 exp.Delete, 2945 tables=tables, 2946 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2947 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2948 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2949 where=self._parse_where(), 2950 returning=returning or self._parse_returning(), 2951 limit=self._parse_limit(), 2952 ) 2953 2954 def _parse_update(self) -> exp.Update: 2955 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2956 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2957 returning = self._parse_returning() 2958 return self.expression( 2959 exp.Update, 2960 **{ # type: ignore 2961 "this": this, 2962 "expressions": expressions, 2963 "from": self._parse_from(joins=True), 2964 "where": self._parse_where(), 2965 "returning": returning or self._parse_returning(), 2966 "order": self._parse_order(), 2967 "limit": self._parse_limit(), 2968 }, 2969 ) 2970 2971 def _parse_uncache(self) -> exp.Uncache: 2972 if not self._match(TokenType.TABLE): 2973 self.raise_error("Expecting TABLE after UNCACHE") 2974 2975 return self.expression( 2976 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2977 ) 2978 2979 def _parse_cache(self) -> exp.Cache: 2980 lazy = self._match_text_seq("LAZY") 2981 self._match(TokenType.TABLE) 2982 table = self._parse_table(schema=True) 2983 2984 options = [] 2985 if self._match_text_seq("OPTIONS"): 2986 self._match_l_paren() 2987 k = self._parse_string() 2988 self._match(TokenType.EQ) 2989 v = self._parse_string() 2990 options = [k, v] 2991 self._match_r_paren() 2992 2993 self._match(TokenType.ALIAS) 2994 return self.expression( 2995 exp.Cache, 2996 this=table, 2997 lazy=lazy, 2998 options=options, 2999 expression=self._parse_select(nested=True), 3000 ) 3001 3002 def _parse_partition(self) -> t.Optional[exp.Partition]: 3003 if not self._match_texts(self.PARTITION_KEYWORDS): 3004 return None 3005 3006 return self.expression( 3007 exp.Partition, 3008 subpartition=self._prev.text.upper() == "SUBPARTITION", 3009 expressions=self._parse_wrapped_csv(self._parse_assignment), 3010 ) 3011 3012 def _parse_value(self) -> t.Optional[exp.Tuple]: 3013 def _parse_value_expression() -> t.Optional[exp.Expression]: 3014 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3015 return exp.var(self._prev.text.upper()) 3016 return self._parse_expression() 3017 3018 if self._match(TokenType.L_PAREN): 3019 expressions = self._parse_csv(_parse_value_expression) 3020 self._match_r_paren() 3021 return self.expression(exp.Tuple, expressions=expressions) 3022 3023 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3024 expression = self._parse_expression() 3025 if expression: 3026 return self.expression(exp.Tuple, expressions=[expression]) 3027 return None 3028 3029 def _parse_projections(self) -> t.List[exp.Expression]: 3030 return self._parse_expressions() 3031 3032 def _parse_select( 3033 self, 3034 nested: bool = False, 3035 table: bool = False, 3036 parse_subquery_alias: bool = True, 3037 parse_set_operation: bool = True, 3038 ) -> t.Optional[exp.Expression]: 3039 cte = self._parse_with() 3040 3041 if cte: 3042 this = self._parse_statement() 3043 3044 if not this: 3045 self.raise_error("Failed to parse any statement following CTE") 3046 return cte 3047 3048 if "with" in this.arg_types: 3049 this.set("with", cte) 3050 else: 3051 self.raise_error(f"{this.key} does not support CTE") 3052 this = cte 3053 3054 return this 3055 3056 # duckdb supports leading with FROM x 3057 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3058 3059 if self._match(TokenType.SELECT): 3060 comments = self._prev_comments 3061 3062 hint = self._parse_hint() 3063 3064 if self._next and not self._next.token_type == TokenType.DOT: 3065 all_ = self._match(TokenType.ALL) 3066 distinct = self._match_set(self.DISTINCT_TOKENS) 3067 else: 3068 all_, distinct = None, None 3069 3070 kind = ( 3071 self._match(TokenType.ALIAS) 3072 and self._match_texts(("STRUCT", "VALUE")) 3073 and self._prev.text.upper() 3074 ) 3075 3076 if distinct: 3077 distinct = self.expression( 3078 exp.Distinct, 3079 on=self._parse_value() if self._match(TokenType.ON) else None, 3080 ) 3081 3082 if all_ and distinct: 3083 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3084 3085 operation_modifiers = [] 3086 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3087 operation_modifiers.append(exp.var(self._prev.text.upper())) 3088 3089 limit = self._parse_limit(top=True) 3090 projections = self._parse_projections() 3091 3092 this = self.expression( 3093 exp.Select, 3094 kind=kind, 3095 hint=hint, 3096 distinct=distinct, 3097 expressions=projections, 3098 limit=limit, 3099 operation_modifiers=operation_modifiers or None, 3100 ) 3101 this.comments = comments 3102 3103 into = self._parse_into() 3104 if into: 3105 this.set("into", into) 3106 3107 if not from_: 3108 from_ = self._parse_from() 3109 3110 if from_: 3111 this.set("from", from_) 3112 3113 this = self._parse_query_modifiers(this) 3114 elif (table or nested) and self._match(TokenType.L_PAREN): 3115 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3116 this = self._parse_simplified_pivot( 3117 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3118 ) 3119 elif self._match(TokenType.FROM): 3120 from_ = self._parse_from(skip_from_token=True) 3121 # Support parentheses for duckdb FROM-first syntax 3122 select = self._parse_select() 3123 if select: 3124 select.set("from", from_) 3125 this = select 3126 else: 3127 this = exp.select("*").from_(t.cast(exp.From, from_)) 3128 else: 3129 this = ( 3130 self._parse_table() 3131 if table 3132 else self._parse_select(nested=True, parse_set_operation=False) 3133 ) 3134 3135 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3136 # in case a modifier (e.g. join) is following 3137 if table and isinstance(this, exp.Values) and this.alias: 3138 alias = this.args["alias"].pop() 3139 this = exp.Table(this=this, alias=alias) 3140 3141 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3142 3143 self._match_r_paren() 3144 3145 # We return early here so that the UNION isn't attached to the subquery by the 3146 # following call to _parse_set_operations, but instead becomes the parent node 3147 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3148 elif self._match(TokenType.VALUES, advance=False): 3149 this = self._parse_derived_table_values() 3150 elif from_: 3151 this = exp.select("*").from_(from_.this, copy=False) 3152 elif self._match(TokenType.SUMMARIZE): 3153 table = self._match(TokenType.TABLE) 3154 this = self._parse_select() or self._parse_string() or self._parse_table() 3155 return self.expression(exp.Summarize, this=this, table=table) 3156 elif self._match(TokenType.DESCRIBE): 3157 this = self._parse_describe() 3158 elif self._match_text_seq("STREAM"): 3159 this = self._parse_function() 3160 if this: 3161 this = self.expression(exp.Stream, this=this) 3162 else: 3163 self._retreat(self._index - 1) 3164 else: 3165 this = None 3166 3167 return self._parse_set_operations(this) if parse_set_operation else this 3168 3169 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3170 if not skip_with_token and not self._match(TokenType.WITH): 3171 return None 3172 3173 comments = self._prev_comments 3174 recursive = self._match(TokenType.RECURSIVE) 3175 3176 last_comments = None 3177 expressions = [] 3178 while True: 3179 expressions.append(self._parse_cte()) 3180 if last_comments: 3181 expressions[-1].add_comments(last_comments) 3182 3183 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3184 break 3185 else: 3186 self._match(TokenType.WITH) 3187 3188 last_comments = self._prev_comments 3189 3190 return self.expression( 3191 exp.With, comments=comments, expressions=expressions, recursive=recursive 3192 ) 3193 3194 def _parse_cte(self) -> t.Optional[exp.CTE]: 3195 index = self._index 3196 3197 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3198 if not alias or not alias.this: 3199 self.raise_error("Expected CTE to have alias") 3200 3201 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3202 self._retreat(index) 3203 return None 3204 3205 comments = self._prev_comments 3206 3207 if self._match_text_seq("NOT", "MATERIALIZED"): 3208 materialized = False 3209 elif self._match_text_seq("MATERIALIZED"): 3210 materialized = True 3211 else: 3212 materialized = None 3213 3214 cte = self.expression( 3215 exp.CTE, 3216 this=self._parse_wrapped(self._parse_statement), 3217 alias=alias, 3218 materialized=materialized, 3219 comments=comments, 3220 ) 3221 3222 if isinstance(cte.this, exp.Values): 3223 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3224 3225 return cte 3226 3227 def _parse_table_alias( 3228 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3229 ) -> t.Optional[exp.TableAlias]: 3230 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3231 # so this section tries to parse the clause version and if it fails, it treats the token 3232 # as an identifier (alias) 3233 if self._can_parse_limit_or_offset(): 3234 return None 3235 3236 any_token = self._match(TokenType.ALIAS) 3237 alias = ( 3238 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3239 or self._parse_string_as_identifier() 3240 ) 3241 3242 index = self._index 3243 if self._match(TokenType.L_PAREN): 3244 columns = self._parse_csv(self._parse_function_parameter) 3245 self._match_r_paren() if columns else self._retreat(index) 3246 else: 3247 columns = None 3248 3249 if not alias and not columns: 3250 return None 3251 3252 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3253 3254 # We bubble up comments from the Identifier to the TableAlias 3255 if isinstance(alias, exp.Identifier): 3256 table_alias.add_comments(alias.pop_comments()) 3257 3258 return table_alias 3259 3260 def _parse_subquery( 3261 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3262 ) -> t.Optional[exp.Subquery]: 3263 if not this: 3264 return None 3265 3266 return self.expression( 3267 exp.Subquery, 3268 this=this, 3269 pivots=self._parse_pivots(), 3270 alias=self._parse_table_alias() if parse_alias else None, 3271 sample=self._parse_table_sample(), 3272 ) 3273 3274 def _implicit_unnests_to_explicit(self, this: E) -> E: 3275 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3276 3277 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3278 for i, join in enumerate(this.args.get("joins") or []): 3279 table = join.this 3280 normalized_table = table.copy() 3281 normalized_table.meta["maybe_column"] = True 3282 normalized_table = _norm(normalized_table, dialect=self.dialect) 3283 3284 if isinstance(table, exp.Table) and not join.args.get("on"): 3285 if normalized_table.parts[0].name in refs: 3286 table_as_column = table.to_column() 3287 unnest = exp.Unnest(expressions=[table_as_column]) 3288 3289 # Table.to_column creates a parent Alias node that we want to convert to 3290 # a TableAlias and attach to the Unnest, so it matches the parser's output 3291 if isinstance(table.args.get("alias"), exp.TableAlias): 3292 table_as_column.replace(table_as_column.this) 3293 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3294 3295 table.replace(unnest) 3296 3297 refs.add(normalized_table.alias_or_name) 3298 3299 return this 3300 3301 def _parse_query_modifiers( 3302 self, this: t.Optional[exp.Expression] 3303 ) -> t.Optional[exp.Expression]: 3304 if isinstance(this, (exp.Query, exp.Table)): 3305 for join in self._parse_joins(): 3306 this.append("joins", join) 3307 for lateral in iter(self._parse_lateral, None): 3308 this.append("laterals", lateral) 3309 3310 while True: 3311 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3312 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3313 key, expression = parser(self) 3314 3315 if expression: 3316 this.set(key, expression) 3317 if key == "limit": 3318 offset = expression.args.pop("offset", None) 3319 3320 if offset: 3321 offset = exp.Offset(expression=offset) 3322 this.set("offset", offset) 3323 3324 limit_by_expressions = expression.expressions 3325 expression.set("expressions", None) 3326 offset.set("expressions", limit_by_expressions) 3327 continue 3328 break 3329 3330 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3331 this = self._implicit_unnests_to_explicit(this) 3332 3333 return this 3334 3335 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3336 start = self._curr 3337 while self._curr: 3338 self._advance() 3339 3340 end = self._tokens[self._index - 1] 3341 return exp.Hint(expressions=[self._find_sql(start, end)]) 3342 3343 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3344 return self._parse_function_call() 3345 3346 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3347 start_index = self._index 3348 should_fallback_to_string = False 3349 3350 hints = [] 3351 try: 3352 for hint in iter( 3353 lambda: self._parse_csv( 3354 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3355 ), 3356 [], 3357 ): 3358 hints.extend(hint) 3359 except ParseError: 3360 should_fallback_to_string = True 3361 3362 if should_fallback_to_string or self._curr: 3363 self._retreat(start_index) 3364 return self._parse_hint_fallback_to_string() 3365 3366 return self.expression(exp.Hint, expressions=hints) 3367 3368 def _parse_hint(self) -> t.Optional[exp.Hint]: 3369 if self._match(TokenType.HINT) and self._prev_comments: 3370 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3371 3372 return None 3373 3374 def _parse_into(self) -> t.Optional[exp.Into]: 3375 if not self._match(TokenType.INTO): 3376 return None 3377 3378 temp = self._match(TokenType.TEMPORARY) 3379 unlogged = self._match_text_seq("UNLOGGED") 3380 self._match(TokenType.TABLE) 3381 3382 return self.expression( 3383 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3384 ) 3385 3386 def _parse_from( 3387 self, joins: bool = False, skip_from_token: bool = False 3388 ) -> t.Optional[exp.From]: 3389 if not skip_from_token and not self._match(TokenType.FROM): 3390 return None 3391 3392 return self.expression( 3393 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3394 ) 3395 3396 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3397 return self.expression( 3398 exp.MatchRecognizeMeasure, 3399 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3400 this=self._parse_expression(), 3401 ) 3402 3403 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3404 if not self._match(TokenType.MATCH_RECOGNIZE): 3405 return None 3406 3407 self._match_l_paren() 3408 3409 partition = self._parse_partition_by() 3410 order = self._parse_order() 3411 3412 measures = ( 3413 self._parse_csv(self._parse_match_recognize_measure) 3414 if self._match_text_seq("MEASURES") 3415 else None 3416 ) 3417 3418 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3419 rows = exp.var("ONE ROW PER MATCH") 3420 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3421 text = "ALL ROWS PER MATCH" 3422 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3423 text += " SHOW EMPTY MATCHES" 3424 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3425 text += " OMIT EMPTY MATCHES" 3426 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3427 text += " WITH UNMATCHED ROWS" 3428 rows = exp.var(text) 3429 else: 3430 rows = None 3431 3432 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3433 text = "AFTER MATCH SKIP" 3434 if self._match_text_seq("PAST", "LAST", "ROW"): 3435 text += " PAST LAST ROW" 3436 elif self._match_text_seq("TO", "NEXT", "ROW"): 3437 text += " TO NEXT ROW" 3438 elif self._match_text_seq("TO", "FIRST"): 3439 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3440 elif self._match_text_seq("TO", "LAST"): 3441 text += f" TO LAST {self._advance_any().text}" # type: ignore 3442 after = exp.var(text) 3443 else: 3444 after = None 3445 3446 if self._match_text_seq("PATTERN"): 3447 self._match_l_paren() 3448 3449 if not self._curr: 3450 self.raise_error("Expecting )", self._curr) 3451 3452 paren = 1 3453 start = self._curr 3454 3455 while self._curr and paren > 0: 3456 if self._curr.token_type == TokenType.L_PAREN: 3457 paren += 1 3458 if self._curr.token_type == TokenType.R_PAREN: 3459 paren -= 1 3460 3461 end = self._prev 3462 self._advance() 3463 3464 if paren > 0: 3465 self.raise_error("Expecting )", self._curr) 3466 3467 pattern = exp.var(self._find_sql(start, end)) 3468 else: 3469 pattern = None 3470 3471 define = ( 3472 self._parse_csv(self._parse_name_as_expression) 3473 if self._match_text_seq("DEFINE") 3474 else None 3475 ) 3476 3477 self._match_r_paren() 3478 3479 return self.expression( 3480 exp.MatchRecognize, 3481 partition_by=partition, 3482 order=order, 3483 measures=measures, 3484 rows=rows, 3485 after=after, 3486 pattern=pattern, 3487 define=define, 3488 alias=self._parse_table_alias(), 3489 ) 3490 3491 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3492 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3493 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3494 cross_apply = False 3495 3496 if cross_apply is not None: 3497 this = self._parse_select(table=True) 3498 view = None 3499 outer = None 3500 elif self._match(TokenType.LATERAL): 3501 this = self._parse_select(table=True) 3502 view = self._match(TokenType.VIEW) 3503 outer = self._match(TokenType.OUTER) 3504 else: 3505 return None 3506 3507 if not this: 3508 this = ( 3509 self._parse_unnest() 3510 or self._parse_function() 3511 or self._parse_id_var(any_token=False) 3512 ) 3513 3514 while self._match(TokenType.DOT): 3515 this = exp.Dot( 3516 this=this, 3517 expression=self._parse_function() or self._parse_id_var(any_token=False), 3518 ) 3519 3520 if view: 3521 table = self._parse_id_var(any_token=False) 3522 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3523 table_alias: t.Optional[exp.TableAlias] = self.expression( 3524 exp.TableAlias, this=table, columns=columns 3525 ) 3526 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3527 # We move the alias from the lateral's child node to the lateral itself 3528 table_alias = this.args["alias"].pop() 3529 else: 3530 table_alias = self._parse_table_alias() 3531 3532 return self.expression( 3533 exp.Lateral, 3534 this=this, 3535 view=view, 3536 outer=outer, 3537 alias=table_alias, 3538 cross_apply=cross_apply, 3539 ) 3540 3541 def _parse_join_parts( 3542 self, 3543 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3544 return ( 3545 self._match_set(self.JOIN_METHODS) and self._prev, 3546 self._match_set(self.JOIN_SIDES) and self._prev, 3547 self._match_set(self.JOIN_KINDS) and self._prev, 3548 ) 3549 3550 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3551 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3552 this = self._parse_column() 3553 if isinstance(this, exp.Column): 3554 return this.this 3555 return this 3556 3557 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3558 3559 def _parse_join( 3560 self, skip_join_token: bool = False, parse_bracket: bool = False 3561 ) -> t.Optional[exp.Join]: 3562 if self._match(TokenType.COMMA): 3563 return self.expression(exp.Join, this=self._parse_table()) 3564 3565 index = self._index 3566 method, side, kind = self._parse_join_parts() 3567 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3568 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3569 3570 if not skip_join_token and not join: 3571 self._retreat(index) 3572 kind = None 3573 method = None 3574 side = None 3575 3576 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3577 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3578 3579 if not skip_join_token and not join and not outer_apply and not cross_apply: 3580 return None 3581 3582 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3583 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3584 kwargs["expressions"] = self._parse_csv( 3585 lambda: self._parse_table(parse_bracket=parse_bracket) 3586 ) 3587 3588 if method: 3589 kwargs["method"] = method.text 3590 if side: 3591 kwargs["side"] = side.text 3592 if kind: 3593 kwargs["kind"] = kind.text 3594 if hint: 3595 kwargs["hint"] = hint 3596 3597 if self._match(TokenType.MATCH_CONDITION): 3598 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3599 3600 if self._match(TokenType.ON): 3601 kwargs["on"] = self._parse_assignment() 3602 elif self._match(TokenType.USING): 3603 kwargs["using"] = self._parse_using_identifiers() 3604 elif ( 3605 not (outer_apply or cross_apply) 3606 and not isinstance(kwargs["this"], exp.Unnest) 3607 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3608 ): 3609 index = self._index 3610 joins: t.Optional[list] = list(self._parse_joins()) 3611 3612 if joins and self._match(TokenType.ON): 3613 kwargs["on"] = self._parse_assignment() 3614 elif joins and self._match(TokenType.USING): 3615 kwargs["using"] = self._parse_using_identifiers() 3616 else: 3617 joins = None 3618 self._retreat(index) 3619 3620 kwargs["this"].set("joins", joins if joins else None) 3621 3622 comments = [c for token in (method, side, kind) if token for c in token.comments] 3623 return self.expression(exp.Join, comments=comments, **kwargs) 3624 3625 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3626 this = self._parse_assignment() 3627 3628 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3629 return this 3630 3631 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3632 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3633 3634 return this 3635 3636 def _parse_index_params(self) -> exp.IndexParameters: 3637 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3638 3639 if self._match(TokenType.L_PAREN, advance=False): 3640 columns = self._parse_wrapped_csv(self._parse_with_operator) 3641 else: 3642 columns = None 3643 3644 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3645 partition_by = self._parse_partition_by() 3646 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3647 tablespace = ( 3648 self._parse_var(any_token=True) 3649 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3650 else None 3651 ) 3652 where = self._parse_where() 3653 3654 on = self._parse_field() if self._match(TokenType.ON) else None 3655 3656 return self.expression( 3657 exp.IndexParameters, 3658 using=using, 3659 columns=columns, 3660 include=include, 3661 partition_by=partition_by, 3662 where=where, 3663 with_storage=with_storage, 3664 tablespace=tablespace, 3665 on=on, 3666 ) 3667 3668 def _parse_index( 3669 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3670 ) -> t.Optional[exp.Index]: 3671 if index or anonymous: 3672 unique = None 3673 primary = None 3674 amp = None 3675 3676 self._match(TokenType.ON) 3677 self._match(TokenType.TABLE) # hive 3678 table = self._parse_table_parts(schema=True) 3679 else: 3680 unique = self._match(TokenType.UNIQUE) 3681 primary = self._match_text_seq("PRIMARY") 3682 amp = self._match_text_seq("AMP") 3683 3684 if not self._match(TokenType.INDEX): 3685 return None 3686 3687 index = self._parse_id_var() 3688 table = None 3689 3690 params = self._parse_index_params() 3691 3692 return self.expression( 3693 exp.Index, 3694 this=index, 3695 table=table, 3696 unique=unique, 3697 primary=primary, 3698 amp=amp, 3699 params=params, 3700 ) 3701 3702 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3703 hints: t.List[exp.Expression] = [] 3704 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3705 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3706 hints.append( 3707 self.expression( 3708 exp.WithTableHint, 3709 expressions=self._parse_csv( 3710 lambda: self._parse_function() or self._parse_var(any_token=True) 3711 ), 3712 ) 3713 ) 3714 self._match_r_paren() 3715 else: 3716 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3717 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3718 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3719 3720 self._match_set((TokenType.INDEX, TokenType.KEY)) 3721 if self._match(TokenType.FOR): 3722 hint.set("target", self._advance_any() and self._prev.text.upper()) 3723 3724 hint.set("expressions", self._parse_wrapped_id_vars()) 3725 hints.append(hint) 3726 3727 return hints or None 3728 3729 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3730 return ( 3731 (not schema and self._parse_function(optional_parens=False)) 3732 or self._parse_id_var(any_token=False) 3733 or self._parse_string_as_identifier() 3734 or self._parse_placeholder() 3735 ) 3736 3737 def _parse_table_parts( 3738 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3739 ) -> exp.Table: 3740 catalog = None 3741 db = None 3742 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3743 3744 while self._match(TokenType.DOT): 3745 if catalog: 3746 # This allows nesting the table in arbitrarily many dot expressions if needed 3747 table = self.expression( 3748 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3749 ) 3750 else: 3751 catalog = db 3752 db = table 3753 # "" used for tsql FROM a..b case 3754 table = self._parse_table_part(schema=schema) or "" 3755 3756 if ( 3757 wildcard 3758 and self._is_connected() 3759 and (isinstance(table, exp.Identifier) or not table) 3760 and self._match(TokenType.STAR) 3761 ): 3762 if isinstance(table, exp.Identifier): 3763 table.args["this"] += "*" 3764 else: 3765 table = exp.Identifier(this="*") 3766 3767 # We bubble up comments from the Identifier to the Table 3768 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3769 3770 if is_db_reference: 3771 catalog = db 3772 db = table 3773 table = None 3774 3775 if not table and not is_db_reference: 3776 self.raise_error(f"Expected table name but got {self._curr}") 3777 if not db and is_db_reference: 3778 self.raise_error(f"Expected database name but got {self._curr}") 3779 3780 table = self.expression( 3781 exp.Table, 3782 comments=comments, 3783 this=table, 3784 db=db, 3785 catalog=catalog, 3786 ) 3787 3788 changes = self._parse_changes() 3789 if changes: 3790 table.set("changes", changes) 3791 3792 at_before = self._parse_historical_data() 3793 if at_before: 3794 table.set("when", at_before) 3795 3796 pivots = self._parse_pivots() 3797 if pivots: 3798 table.set("pivots", pivots) 3799 3800 return table 3801 3802 def _parse_table( 3803 self, 3804 schema: bool = False, 3805 joins: bool = False, 3806 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3807 parse_bracket: bool = False, 3808 is_db_reference: bool = False, 3809 parse_partition: bool = False, 3810 ) -> t.Optional[exp.Expression]: 3811 lateral = self._parse_lateral() 3812 if lateral: 3813 return lateral 3814 3815 unnest = self._parse_unnest() 3816 if unnest: 3817 return unnest 3818 3819 values = self._parse_derived_table_values() 3820 if values: 3821 return values 3822 3823 subquery = self._parse_select(table=True) 3824 if subquery: 3825 if not subquery.args.get("pivots"): 3826 subquery.set("pivots", self._parse_pivots()) 3827 return subquery 3828 3829 bracket = parse_bracket and self._parse_bracket(None) 3830 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3831 3832 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3833 self._parse_table 3834 ) 3835 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3836 3837 only = self._match(TokenType.ONLY) 3838 3839 this = t.cast( 3840 exp.Expression, 3841 bracket 3842 or rows_from 3843 or self._parse_bracket( 3844 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3845 ), 3846 ) 3847 3848 if only: 3849 this.set("only", only) 3850 3851 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3852 self._match_text_seq("*") 3853 3854 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3855 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3856 this.set("partition", self._parse_partition()) 3857 3858 if schema: 3859 return self._parse_schema(this=this) 3860 3861 version = self._parse_version() 3862 3863 if version: 3864 this.set("version", version) 3865 3866 if self.dialect.ALIAS_POST_TABLESAMPLE: 3867 this.set("sample", self._parse_table_sample()) 3868 3869 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3870 if alias: 3871 this.set("alias", alias) 3872 3873 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3874 return self.expression( 3875 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3876 ) 3877 3878 this.set("hints", self._parse_table_hints()) 3879 3880 if not this.args.get("pivots"): 3881 this.set("pivots", self._parse_pivots()) 3882 3883 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3884 this.set("sample", self._parse_table_sample()) 3885 3886 if joins: 3887 for join in self._parse_joins(): 3888 this.append("joins", join) 3889 3890 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3891 this.set("ordinality", True) 3892 this.set("alias", self._parse_table_alias()) 3893 3894 return this 3895 3896 def _parse_version(self) -> t.Optional[exp.Version]: 3897 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3898 this = "TIMESTAMP" 3899 elif self._match(TokenType.VERSION_SNAPSHOT): 3900 this = "VERSION" 3901 else: 3902 return None 3903 3904 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3905 kind = self._prev.text.upper() 3906 start = self._parse_bitwise() 3907 self._match_texts(("TO", "AND")) 3908 end = self._parse_bitwise() 3909 expression: t.Optional[exp.Expression] = self.expression( 3910 exp.Tuple, expressions=[start, end] 3911 ) 3912 elif self._match_text_seq("CONTAINED", "IN"): 3913 kind = "CONTAINED IN" 3914 expression = self.expression( 3915 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3916 ) 3917 elif self._match(TokenType.ALL): 3918 kind = "ALL" 3919 expression = None 3920 else: 3921 self._match_text_seq("AS", "OF") 3922 kind = "AS OF" 3923 expression = self._parse_type() 3924 3925 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3926 3927 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3928 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3929 index = self._index 3930 historical_data = None 3931 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3932 this = self._prev.text.upper() 3933 kind = ( 3934 self._match(TokenType.L_PAREN) 3935 and self._match_texts(self.HISTORICAL_DATA_KIND) 3936 and self._prev.text.upper() 3937 ) 3938 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3939 3940 if expression: 3941 self._match_r_paren() 3942 historical_data = self.expression( 3943 exp.HistoricalData, this=this, kind=kind, expression=expression 3944 ) 3945 else: 3946 self._retreat(index) 3947 3948 return historical_data 3949 3950 def _parse_changes(self) -> t.Optional[exp.Changes]: 3951 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3952 return None 3953 3954 information = self._parse_var(any_token=True) 3955 self._match_r_paren() 3956 3957 return self.expression( 3958 exp.Changes, 3959 information=information, 3960 at_before=self._parse_historical_data(), 3961 end=self._parse_historical_data(), 3962 ) 3963 3964 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3965 if not self._match(TokenType.UNNEST): 3966 return None 3967 3968 expressions = self._parse_wrapped_csv(self._parse_equality) 3969 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3970 3971 alias = self._parse_table_alias() if with_alias else None 3972 3973 if alias: 3974 if self.dialect.UNNEST_COLUMN_ONLY: 3975 if alias.args.get("columns"): 3976 self.raise_error("Unexpected extra column alias in unnest.") 3977 3978 alias.set("columns", [alias.this]) 3979 alias.set("this", None) 3980 3981 columns = alias.args.get("columns") or [] 3982 if offset and len(expressions) < len(columns): 3983 offset = columns.pop() 3984 3985 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3986 self._match(TokenType.ALIAS) 3987 offset = self._parse_id_var( 3988 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3989 ) or exp.to_identifier("offset") 3990 3991 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3992 3993 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3994 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3995 if not is_derived and not ( 3996 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3997 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3998 ): 3999 return None 4000 4001 expressions = self._parse_csv(self._parse_value) 4002 alias = self._parse_table_alias() 4003 4004 if is_derived: 4005 self._match_r_paren() 4006 4007 return self.expression( 4008 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4009 ) 4010 4011 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4012 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4013 as_modifier and self._match_text_seq("USING", "SAMPLE") 4014 ): 4015 return None 4016 4017 bucket_numerator = None 4018 bucket_denominator = None 4019 bucket_field = None 4020 percent = None 4021 size = None 4022 seed = None 4023 4024 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4025 matched_l_paren = self._match(TokenType.L_PAREN) 4026 4027 if self.TABLESAMPLE_CSV: 4028 num = None 4029 expressions = self._parse_csv(self._parse_primary) 4030 else: 4031 expressions = None 4032 num = ( 4033 self._parse_factor() 4034 if self._match(TokenType.NUMBER, advance=False) 4035 else self._parse_primary() or self._parse_placeholder() 4036 ) 4037 4038 if self._match_text_seq("BUCKET"): 4039 bucket_numerator = self._parse_number() 4040 self._match_text_seq("OUT", "OF") 4041 bucket_denominator = bucket_denominator = self._parse_number() 4042 self._match(TokenType.ON) 4043 bucket_field = self._parse_field() 4044 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4045 percent = num 4046 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4047 size = num 4048 else: 4049 percent = num 4050 4051 if matched_l_paren: 4052 self._match_r_paren() 4053 4054 if self._match(TokenType.L_PAREN): 4055 method = self._parse_var(upper=True) 4056 seed = self._match(TokenType.COMMA) and self._parse_number() 4057 self._match_r_paren() 4058 elif self._match_texts(("SEED", "REPEATABLE")): 4059 seed = self._parse_wrapped(self._parse_number) 4060 4061 if not method and self.DEFAULT_SAMPLING_METHOD: 4062 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4063 4064 return self.expression( 4065 exp.TableSample, 4066 expressions=expressions, 4067 method=method, 4068 bucket_numerator=bucket_numerator, 4069 bucket_denominator=bucket_denominator, 4070 bucket_field=bucket_field, 4071 percent=percent, 4072 size=size, 4073 seed=seed, 4074 ) 4075 4076 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4077 return list(iter(self._parse_pivot, None)) or None 4078 4079 def _parse_joins(self) -> t.Iterator[exp.Join]: 4080 return iter(self._parse_join, None) 4081 4082 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4083 if not self._match(TokenType.INTO): 4084 return None 4085 4086 return self.expression( 4087 exp.UnpivotColumns, 4088 this=self._match_text_seq("NAME") and self._parse_column(), 4089 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4090 ) 4091 4092 # https://duckdb.org/docs/sql/statements/pivot 4093 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4094 def _parse_on() -> t.Optional[exp.Expression]: 4095 this = self._parse_bitwise() 4096 4097 if self._match(TokenType.IN): 4098 # PIVOT ... ON col IN (row_val1, row_val2) 4099 return self._parse_in(this) 4100 if self._match(TokenType.ALIAS, advance=False): 4101 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4102 return self._parse_alias(this) 4103 4104 return this 4105 4106 this = self._parse_table() 4107 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4108 into = self._parse_unpivot_columns() 4109 using = self._match(TokenType.USING) and self._parse_csv( 4110 lambda: self._parse_alias(self._parse_function()) 4111 ) 4112 group = self._parse_group() 4113 4114 return self.expression( 4115 exp.Pivot, 4116 this=this, 4117 expressions=expressions, 4118 using=using, 4119 group=group, 4120 unpivot=is_unpivot, 4121 into=into, 4122 ) 4123 4124 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4125 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4126 this = self._parse_select_or_expression() 4127 4128 self._match(TokenType.ALIAS) 4129 alias = self._parse_bitwise() 4130 if alias: 4131 if isinstance(alias, exp.Column) and not alias.db: 4132 alias = alias.this 4133 return self.expression(exp.PivotAlias, this=this, alias=alias) 4134 4135 return this 4136 4137 value = self._parse_column() 4138 4139 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4140 self.raise_error("Expecting IN (") 4141 4142 if self._match(TokenType.ANY): 4143 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4144 else: 4145 exprs = self._parse_csv(_parse_aliased_expression) 4146 4147 self._match_r_paren() 4148 return self.expression(exp.In, this=value, expressions=exprs) 4149 4150 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4151 index = self._index 4152 include_nulls = None 4153 4154 if self._match(TokenType.PIVOT): 4155 unpivot = False 4156 elif self._match(TokenType.UNPIVOT): 4157 unpivot = True 4158 4159 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4160 if self._match_text_seq("INCLUDE", "NULLS"): 4161 include_nulls = True 4162 elif self._match_text_seq("EXCLUDE", "NULLS"): 4163 include_nulls = False 4164 else: 4165 return None 4166 4167 expressions = [] 4168 4169 if not self._match(TokenType.L_PAREN): 4170 self._retreat(index) 4171 return None 4172 4173 if unpivot: 4174 expressions = self._parse_csv(self._parse_column) 4175 else: 4176 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4177 4178 if not expressions: 4179 self.raise_error("Failed to parse PIVOT's aggregation list") 4180 4181 if not self._match(TokenType.FOR): 4182 self.raise_error("Expecting FOR") 4183 4184 field = self._parse_pivot_in() 4185 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4186 self._parse_bitwise 4187 ) 4188 4189 self._match_r_paren() 4190 4191 pivot = self.expression( 4192 exp.Pivot, 4193 expressions=expressions, 4194 field=field, 4195 unpivot=unpivot, 4196 include_nulls=include_nulls, 4197 default_on_null=default_on_null, 4198 ) 4199 4200 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4201 pivot.set("alias", self._parse_table_alias()) 4202 4203 if not unpivot: 4204 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4205 4206 columns: t.List[exp.Expression] = [] 4207 for fld in pivot.args["field"].expressions: 4208 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4209 for name in names: 4210 if self.PREFIXED_PIVOT_COLUMNS: 4211 name = f"{name}_{field_name}" if name else field_name 4212 else: 4213 name = f"{field_name}_{name}" if name else field_name 4214 4215 columns.append(exp.to_identifier(name)) 4216 4217 pivot.set("columns", columns) 4218 4219 return pivot 4220 4221 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4222 return [agg.alias for agg in aggregations] 4223 4224 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4225 if not skip_where_token and not self._match(TokenType.PREWHERE): 4226 return None 4227 4228 return self.expression( 4229 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4230 ) 4231 4232 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4233 if not skip_where_token and not self._match(TokenType.WHERE): 4234 return None 4235 4236 return self.expression( 4237 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4238 ) 4239 4240 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4241 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4242 return None 4243 4244 elements: t.Dict[str, t.Any] = defaultdict(list) 4245 4246 if self._match(TokenType.ALL): 4247 elements["all"] = True 4248 elif self._match(TokenType.DISTINCT): 4249 elements["all"] = False 4250 4251 while True: 4252 index = self._index 4253 4254 elements["expressions"].extend( 4255 self._parse_csv( 4256 lambda: None 4257 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4258 else self._parse_assignment() 4259 ) 4260 ) 4261 4262 before_with_index = self._index 4263 with_prefix = self._match(TokenType.WITH) 4264 4265 if self._match(TokenType.ROLLUP): 4266 elements["rollup"].append( 4267 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4268 ) 4269 elif self._match(TokenType.CUBE): 4270 elements["cube"].append( 4271 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4272 ) 4273 elif self._match(TokenType.GROUPING_SETS): 4274 elements["grouping_sets"].append( 4275 self.expression( 4276 exp.GroupingSets, 4277 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4278 ) 4279 ) 4280 elif self._match_text_seq("TOTALS"): 4281 elements["totals"] = True # type: ignore 4282 4283 if before_with_index <= self._index <= before_with_index + 1: 4284 self._retreat(before_with_index) 4285 break 4286 4287 if index == self._index: 4288 break 4289 4290 return self.expression(exp.Group, **elements) # type: ignore 4291 4292 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4293 return self.expression( 4294 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4295 ) 4296 4297 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4298 if self._match(TokenType.L_PAREN): 4299 grouping_set = self._parse_csv(self._parse_column) 4300 self._match_r_paren() 4301 return self.expression(exp.Tuple, expressions=grouping_set) 4302 4303 return self._parse_column() 4304 4305 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4306 if not skip_having_token and not self._match(TokenType.HAVING): 4307 return None 4308 return self.expression(exp.Having, this=self._parse_assignment()) 4309 4310 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4311 if not self._match(TokenType.QUALIFY): 4312 return None 4313 return self.expression(exp.Qualify, this=self._parse_assignment()) 4314 4315 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4316 if skip_start_token: 4317 start = None 4318 elif self._match(TokenType.START_WITH): 4319 start = self._parse_assignment() 4320 else: 4321 return None 4322 4323 self._match(TokenType.CONNECT_BY) 4324 nocycle = self._match_text_seq("NOCYCLE") 4325 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4326 exp.Prior, this=self._parse_bitwise() 4327 ) 4328 connect = self._parse_assignment() 4329 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4330 4331 if not start and self._match(TokenType.START_WITH): 4332 start = self._parse_assignment() 4333 4334 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4335 4336 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4337 this = self._parse_id_var(any_token=True) 4338 if self._match(TokenType.ALIAS): 4339 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4340 return this 4341 4342 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4343 if self._match_text_seq("INTERPOLATE"): 4344 return self._parse_wrapped_csv(self._parse_name_as_expression) 4345 return None 4346 4347 def _parse_order( 4348 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4349 ) -> t.Optional[exp.Expression]: 4350 siblings = None 4351 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4352 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4353 return this 4354 4355 siblings = True 4356 4357 return self.expression( 4358 exp.Order, 4359 this=this, 4360 expressions=self._parse_csv(self._parse_ordered), 4361 siblings=siblings, 4362 ) 4363 4364 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4365 if not self._match(token): 4366 return None 4367 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4368 4369 def _parse_ordered( 4370 self, parse_method: t.Optional[t.Callable] = None 4371 ) -> t.Optional[exp.Ordered]: 4372 this = parse_method() if parse_method else self._parse_assignment() 4373 if not this: 4374 return None 4375 4376 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4377 this = exp.var("ALL") 4378 4379 asc = self._match(TokenType.ASC) 4380 desc = self._match(TokenType.DESC) or (asc and False) 4381 4382 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4383 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4384 4385 nulls_first = is_nulls_first or False 4386 explicitly_null_ordered = is_nulls_first or is_nulls_last 4387 4388 if ( 4389 not explicitly_null_ordered 4390 and ( 4391 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4392 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4393 ) 4394 and self.dialect.NULL_ORDERING != "nulls_are_last" 4395 ): 4396 nulls_first = True 4397 4398 if self._match_text_seq("WITH", "FILL"): 4399 with_fill = self.expression( 4400 exp.WithFill, 4401 **{ # type: ignore 4402 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4403 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4404 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4405 "interpolate": self._parse_interpolate(), 4406 }, 4407 ) 4408 else: 4409 with_fill = None 4410 4411 return self.expression( 4412 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4413 ) 4414 4415 def _parse_limit( 4416 self, 4417 this: t.Optional[exp.Expression] = None, 4418 top: bool = False, 4419 skip_limit_token: bool = False, 4420 ) -> t.Optional[exp.Expression]: 4421 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4422 comments = self._prev_comments 4423 if top: 4424 limit_paren = self._match(TokenType.L_PAREN) 4425 expression = self._parse_term() if limit_paren else self._parse_number() 4426 4427 if limit_paren: 4428 self._match_r_paren() 4429 else: 4430 expression = self._parse_term() 4431 4432 if self._match(TokenType.COMMA): 4433 offset = expression 4434 expression = self._parse_term() 4435 else: 4436 offset = None 4437 4438 limit_exp = self.expression( 4439 exp.Limit, 4440 this=this, 4441 expression=expression, 4442 offset=offset, 4443 comments=comments, 4444 expressions=self._parse_limit_by(), 4445 ) 4446 4447 return limit_exp 4448 4449 if self._match(TokenType.FETCH): 4450 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4451 direction = self._prev.text.upper() if direction else "FIRST" 4452 4453 count = self._parse_field(tokens=self.FETCH_TOKENS) 4454 percent = self._match(TokenType.PERCENT) 4455 4456 self._match_set((TokenType.ROW, TokenType.ROWS)) 4457 4458 only = self._match_text_seq("ONLY") 4459 with_ties = self._match_text_seq("WITH", "TIES") 4460 4461 if only and with_ties: 4462 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4463 4464 return self.expression( 4465 exp.Fetch, 4466 direction=direction, 4467 count=count, 4468 percent=percent, 4469 with_ties=with_ties, 4470 ) 4471 4472 return this 4473 4474 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4475 if not self._match(TokenType.OFFSET): 4476 return this 4477 4478 count = self._parse_term() 4479 self._match_set((TokenType.ROW, TokenType.ROWS)) 4480 4481 return self.expression( 4482 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4483 ) 4484 4485 def _can_parse_limit_or_offset(self) -> bool: 4486 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4487 return False 4488 4489 index = self._index 4490 result = bool( 4491 self._try_parse(self._parse_limit, retreat=True) 4492 or self._try_parse(self._parse_offset, retreat=True) 4493 ) 4494 self._retreat(index) 4495 return result 4496 4497 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4498 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4499 4500 def _parse_locks(self) -> t.List[exp.Lock]: 4501 locks = [] 4502 while True: 4503 if self._match_text_seq("FOR", "UPDATE"): 4504 update = True 4505 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4506 "LOCK", "IN", "SHARE", "MODE" 4507 ): 4508 update = False 4509 else: 4510 break 4511 4512 expressions = None 4513 if self._match_text_seq("OF"): 4514 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4515 4516 wait: t.Optional[bool | exp.Expression] = None 4517 if self._match_text_seq("NOWAIT"): 4518 wait = True 4519 elif self._match_text_seq("WAIT"): 4520 wait = self._parse_primary() 4521 elif self._match_text_seq("SKIP", "LOCKED"): 4522 wait = False 4523 4524 locks.append( 4525 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4526 ) 4527 4528 return locks 4529 4530 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4531 while this and self._match_set(self.SET_OPERATIONS): 4532 token_type = self._prev.token_type 4533 4534 if token_type == TokenType.UNION: 4535 operation: t.Type[exp.SetOperation] = exp.Union 4536 elif token_type == TokenType.EXCEPT: 4537 operation = exp.Except 4538 else: 4539 operation = exp.Intersect 4540 4541 comments = self._prev.comments 4542 4543 if self._match(TokenType.DISTINCT): 4544 distinct: t.Optional[bool] = True 4545 elif self._match(TokenType.ALL): 4546 distinct = False 4547 else: 4548 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4549 if distinct is None: 4550 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4551 4552 by_name = self._match_text_seq("BY", "NAME") 4553 expression = self._parse_select(nested=True, parse_set_operation=False) 4554 4555 this = self.expression( 4556 operation, 4557 comments=comments, 4558 this=this, 4559 distinct=distinct, 4560 by_name=by_name, 4561 expression=expression, 4562 ) 4563 4564 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4565 expression = this.expression 4566 4567 if expression: 4568 for arg in self.SET_OP_MODIFIERS: 4569 expr = expression.args.get(arg) 4570 if expr: 4571 this.set(arg, expr.pop()) 4572 4573 return this 4574 4575 def _parse_expression(self) -> t.Optional[exp.Expression]: 4576 return self._parse_alias(self._parse_assignment()) 4577 4578 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4579 this = self._parse_disjunction() 4580 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4581 # This allows us to parse <non-identifier token> := <expr> 4582 this = exp.column( 4583 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4584 ) 4585 4586 while self._match_set(self.ASSIGNMENT): 4587 if isinstance(this, exp.Column) and len(this.parts) == 1: 4588 this = this.this 4589 4590 this = self.expression( 4591 self.ASSIGNMENT[self._prev.token_type], 4592 this=this, 4593 comments=self._prev_comments, 4594 expression=self._parse_assignment(), 4595 ) 4596 4597 return this 4598 4599 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4600 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4601 4602 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4603 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4604 4605 def _parse_equality(self) -> t.Optional[exp.Expression]: 4606 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4607 4608 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4609 return self._parse_tokens(self._parse_range, self.COMPARISON) 4610 4611 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4612 this = this or self._parse_bitwise() 4613 negate = self._match(TokenType.NOT) 4614 4615 if self._match_set(self.RANGE_PARSERS): 4616 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4617 if not expression: 4618 return this 4619 4620 this = expression 4621 elif self._match(TokenType.ISNULL): 4622 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4623 4624 # Postgres supports ISNULL and NOTNULL for conditions. 4625 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4626 if self._match(TokenType.NOTNULL): 4627 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4628 this = self.expression(exp.Not, this=this) 4629 4630 if negate: 4631 this = self._negate_range(this) 4632 4633 if self._match(TokenType.IS): 4634 this = self._parse_is(this) 4635 4636 return this 4637 4638 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4639 if not this: 4640 return this 4641 4642 return self.expression(exp.Not, this=this) 4643 4644 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4645 index = self._index - 1 4646 negate = self._match(TokenType.NOT) 4647 4648 if self._match_text_seq("DISTINCT", "FROM"): 4649 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4650 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4651 4652 if self._match(TokenType.JSON): 4653 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4654 4655 if self._match_text_seq("WITH"): 4656 _with = True 4657 elif self._match_text_seq("WITHOUT"): 4658 _with = False 4659 else: 4660 _with = None 4661 4662 unique = self._match(TokenType.UNIQUE) 4663 self._match_text_seq("KEYS") 4664 expression: t.Optional[exp.Expression] = self.expression( 4665 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4666 ) 4667 else: 4668 expression = self._parse_primary() or self._parse_null() 4669 if not expression: 4670 self._retreat(index) 4671 return None 4672 4673 this = self.expression(exp.Is, this=this, expression=expression) 4674 return self.expression(exp.Not, this=this) if negate else this 4675 4676 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4677 unnest = self._parse_unnest(with_alias=False) 4678 if unnest: 4679 this = self.expression(exp.In, this=this, unnest=unnest) 4680 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4681 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4682 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4683 4684 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4685 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4686 else: 4687 this = self.expression(exp.In, this=this, expressions=expressions) 4688 4689 if matched_l_paren: 4690 self._match_r_paren(this) 4691 elif not self._match(TokenType.R_BRACKET, expression=this): 4692 self.raise_error("Expecting ]") 4693 else: 4694 this = self.expression(exp.In, this=this, field=self._parse_column()) 4695 4696 return this 4697 4698 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4699 low = self._parse_bitwise() 4700 self._match(TokenType.AND) 4701 high = self._parse_bitwise() 4702 return self.expression(exp.Between, this=this, low=low, high=high) 4703 4704 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4705 if not self._match(TokenType.ESCAPE): 4706 return this 4707 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4708 4709 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4710 index = self._index 4711 4712 if not self._match(TokenType.INTERVAL) and match_interval: 4713 return None 4714 4715 if self._match(TokenType.STRING, advance=False): 4716 this = self._parse_primary() 4717 else: 4718 this = self._parse_term() 4719 4720 if not this or ( 4721 isinstance(this, exp.Column) 4722 and not this.table 4723 and not this.this.quoted 4724 and this.name.upper() == "IS" 4725 ): 4726 self._retreat(index) 4727 return None 4728 4729 unit = self._parse_function() or ( 4730 not self._match(TokenType.ALIAS, advance=False) 4731 and self._parse_var(any_token=True, upper=True) 4732 ) 4733 4734 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4735 # each INTERVAL expression into this canonical form so it's easy to transpile 4736 if this and this.is_number: 4737 this = exp.Literal.string(this.to_py()) 4738 elif this and this.is_string: 4739 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4740 if parts and unit: 4741 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4742 unit = None 4743 self._retreat(self._index - 1) 4744 4745 if len(parts) == 1: 4746 this = exp.Literal.string(parts[0][0]) 4747 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4748 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4749 unit = self.expression( 4750 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4751 ) 4752 4753 interval = self.expression(exp.Interval, this=this, unit=unit) 4754 4755 index = self._index 4756 self._match(TokenType.PLUS) 4757 4758 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4759 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4760 return self.expression( 4761 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4762 ) 4763 4764 self._retreat(index) 4765 return interval 4766 4767 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4768 this = self._parse_term() 4769 4770 while True: 4771 if self._match_set(self.BITWISE): 4772 this = self.expression( 4773 self.BITWISE[self._prev.token_type], 4774 this=this, 4775 expression=self._parse_term(), 4776 ) 4777 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4778 this = self.expression( 4779 exp.DPipe, 4780 this=this, 4781 expression=self._parse_term(), 4782 safe=not self.dialect.STRICT_STRING_CONCAT, 4783 ) 4784 elif self._match(TokenType.DQMARK): 4785 this = self.expression( 4786 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4787 ) 4788 elif self._match_pair(TokenType.LT, TokenType.LT): 4789 this = self.expression( 4790 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4791 ) 4792 elif self._match_pair(TokenType.GT, TokenType.GT): 4793 this = self.expression( 4794 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4795 ) 4796 else: 4797 break 4798 4799 return this 4800 4801 def _parse_term(self) -> t.Optional[exp.Expression]: 4802 this = self._parse_factor() 4803 4804 while self._match_set(self.TERM): 4805 klass = self.TERM[self._prev.token_type] 4806 comments = self._prev_comments 4807 expression = self._parse_factor() 4808 4809 this = self.expression(klass, this=this, comments=comments, expression=expression) 4810 4811 if isinstance(this, exp.Collate): 4812 expr = this.expression 4813 4814 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4815 # fallback to Identifier / Var 4816 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4817 ident = expr.this 4818 if isinstance(ident, exp.Identifier): 4819 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4820 4821 return this 4822 4823 def _parse_factor(self) -> t.Optional[exp.Expression]: 4824 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4825 this = parse_method() 4826 4827 while self._match_set(self.FACTOR): 4828 klass = self.FACTOR[self._prev.token_type] 4829 comments = self._prev_comments 4830 expression = parse_method() 4831 4832 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4833 self._retreat(self._index - 1) 4834 return this 4835 4836 this = self.expression(klass, this=this, comments=comments, expression=expression) 4837 4838 if isinstance(this, exp.Div): 4839 this.args["typed"] = self.dialect.TYPED_DIVISION 4840 this.args["safe"] = self.dialect.SAFE_DIVISION 4841 4842 return this 4843 4844 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4845 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4846 4847 def _parse_unary(self) -> t.Optional[exp.Expression]: 4848 if self._match_set(self.UNARY_PARSERS): 4849 return self.UNARY_PARSERS[self._prev.token_type](self) 4850 return self._parse_at_time_zone(self._parse_type()) 4851 4852 def _parse_type( 4853 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4854 ) -> t.Optional[exp.Expression]: 4855 interval = parse_interval and self._parse_interval() 4856 if interval: 4857 return interval 4858 4859 index = self._index 4860 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4861 4862 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4863 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4864 if isinstance(data_type, exp.Cast): 4865 # This constructor can contain ops directly after it, for instance struct unnesting: 4866 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4867 return self._parse_column_ops(data_type) 4868 4869 if data_type: 4870 index2 = self._index 4871 this = self._parse_primary() 4872 4873 if isinstance(this, exp.Literal): 4874 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4875 if parser: 4876 return parser(self, this, data_type) 4877 4878 return self.expression(exp.Cast, this=this, to=data_type) 4879 4880 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4881 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4882 # 4883 # If the index difference here is greater than 1, that means the parser itself must have 4884 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4885 # 4886 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4887 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4888 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4889 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4890 # 4891 # In these cases, we don't really want to return the converted type, but instead retreat 4892 # and try to parse a Column or Identifier in the section below. 4893 if data_type.expressions and index2 - index > 1: 4894 self._retreat(index2) 4895 return self._parse_column_ops(data_type) 4896 4897 self._retreat(index) 4898 4899 if fallback_to_identifier: 4900 return self._parse_id_var() 4901 4902 this = self._parse_column() 4903 return this and self._parse_column_ops(this) 4904 4905 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4906 this = self._parse_type() 4907 if not this: 4908 return None 4909 4910 if isinstance(this, exp.Column) and not this.table: 4911 this = exp.var(this.name.upper()) 4912 4913 return self.expression( 4914 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4915 ) 4916 4917 def _parse_types( 4918 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4919 ) -> t.Optional[exp.Expression]: 4920 index = self._index 4921 4922 this: t.Optional[exp.Expression] = None 4923 prefix = self._match_text_seq("SYSUDTLIB", ".") 4924 4925 if not self._match_set(self.TYPE_TOKENS): 4926 identifier = allow_identifiers and self._parse_id_var( 4927 any_token=False, tokens=(TokenType.VAR,) 4928 ) 4929 if isinstance(identifier, exp.Identifier): 4930 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4931 4932 if len(tokens) != 1: 4933 self.raise_error("Unexpected identifier", self._prev) 4934 4935 if tokens[0].token_type in self.TYPE_TOKENS: 4936 self._prev = tokens[0] 4937 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4938 type_name = identifier.name 4939 4940 while self._match(TokenType.DOT): 4941 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4942 4943 this = exp.DataType.build(type_name, udt=True) 4944 else: 4945 self._retreat(self._index - 1) 4946 return None 4947 else: 4948 return None 4949 4950 type_token = self._prev.token_type 4951 4952 if type_token == TokenType.PSEUDO_TYPE: 4953 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4954 4955 if type_token == TokenType.OBJECT_IDENTIFIER: 4956 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4957 4958 # https://materialize.com/docs/sql/types/map/ 4959 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4960 key_type = self._parse_types( 4961 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4962 ) 4963 if not self._match(TokenType.FARROW): 4964 self._retreat(index) 4965 return None 4966 4967 value_type = self._parse_types( 4968 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4969 ) 4970 if not self._match(TokenType.R_BRACKET): 4971 self._retreat(index) 4972 return None 4973 4974 return exp.DataType( 4975 this=exp.DataType.Type.MAP, 4976 expressions=[key_type, value_type], 4977 nested=True, 4978 prefix=prefix, 4979 ) 4980 4981 nested = type_token in self.NESTED_TYPE_TOKENS 4982 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4983 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4984 expressions = None 4985 maybe_func = False 4986 4987 if self._match(TokenType.L_PAREN): 4988 if is_struct: 4989 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4990 elif nested: 4991 expressions = self._parse_csv( 4992 lambda: self._parse_types( 4993 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4994 ) 4995 ) 4996 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4997 this = expressions[0] 4998 this.set("nullable", True) 4999 self._match_r_paren() 5000 return this 5001 elif type_token in self.ENUM_TYPE_TOKENS: 5002 expressions = self._parse_csv(self._parse_equality) 5003 elif is_aggregate: 5004 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5005 any_token=False, tokens=(TokenType.VAR,) 5006 ) 5007 if not func_or_ident or not self._match(TokenType.COMMA): 5008 return None 5009 expressions = self._parse_csv( 5010 lambda: self._parse_types( 5011 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5012 ) 5013 ) 5014 expressions.insert(0, func_or_ident) 5015 else: 5016 expressions = self._parse_csv(self._parse_type_size) 5017 5018 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5019 if type_token == TokenType.VECTOR and len(expressions) == 2: 5020 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5021 5022 if not expressions or not self._match(TokenType.R_PAREN): 5023 self._retreat(index) 5024 return None 5025 5026 maybe_func = True 5027 5028 values: t.Optional[t.List[exp.Expression]] = None 5029 5030 if nested and self._match(TokenType.LT): 5031 if is_struct: 5032 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5033 else: 5034 expressions = self._parse_csv( 5035 lambda: self._parse_types( 5036 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5037 ) 5038 ) 5039 5040 if not self._match(TokenType.GT): 5041 self.raise_error("Expecting >") 5042 5043 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5044 values = self._parse_csv(self._parse_assignment) 5045 if not values and is_struct: 5046 values = None 5047 self._retreat(self._index - 1) 5048 else: 5049 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5050 5051 if type_token in self.TIMESTAMPS: 5052 if self._match_text_seq("WITH", "TIME", "ZONE"): 5053 maybe_func = False 5054 tz_type = ( 5055 exp.DataType.Type.TIMETZ 5056 if type_token in self.TIMES 5057 else exp.DataType.Type.TIMESTAMPTZ 5058 ) 5059 this = exp.DataType(this=tz_type, expressions=expressions) 5060 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5061 maybe_func = False 5062 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5063 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5064 maybe_func = False 5065 elif type_token == TokenType.INTERVAL: 5066 unit = self._parse_var(upper=True) 5067 if unit: 5068 if self._match_text_seq("TO"): 5069 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5070 5071 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5072 else: 5073 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5074 5075 if maybe_func and check_func: 5076 index2 = self._index 5077 peek = self._parse_string() 5078 5079 if not peek: 5080 self._retreat(index) 5081 return None 5082 5083 self._retreat(index2) 5084 5085 if not this: 5086 if self._match_text_seq("UNSIGNED"): 5087 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5088 if not unsigned_type_token: 5089 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5090 5091 type_token = unsigned_type_token or type_token 5092 5093 this = exp.DataType( 5094 this=exp.DataType.Type[type_token.value], 5095 expressions=expressions, 5096 nested=nested, 5097 prefix=prefix, 5098 ) 5099 5100 # Empty arrays/structs are allowed 5101 if values is not None: 5102 cls = exp.Struct if is_struct else exp.Array 5103 this = exp.cast(cls(expressions=values), this, copy=False) 5104 5105 elif expressions: 5106 this.set("expressions", expressions) 5107 5108 # https://materialize.com/docs/sql/types/list/#type-name 5109 while self._match(TokenType.LIST): 5110 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5111 5112 index = self._index 5113 5114 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5115 matched_array = self._match(TokenType.ARRAY) 5116 5117 while self._curr: 5118 datatype_token = self._prev.token_type 5119 matched_l_bracket = self._match(TokenType.L_BRACKET) 5120 if not matched_l_bracket and not matched_array: 5121 break 5122 5123 matched_array = False 5124 values = self._parse_csv(self._parse_assignment) or None 5125 if ( 5126 values 5127 and not schema 5128 and ( 5129 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5130 ) 5131 ): 5132 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5133 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5134 self._retreat(index) 5135 break 5136 5137 this = exp.DataType( 5138 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5139 ) 5140 self._match(TokenType.R_BRACKET) 5141 5142 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5143 converter = self.TYPE_CONVERTERS.get(this.this) 5144 if converter: 5145 this = converter(t.cast(exp.DataType, this)) 5146 5147 return this 5148 5149 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5150 index = self._index 5151 5152 if ( 5153 self._curr 5154 and self._next 5155 and self._curr.token_type in self.TYPE_TOKENS 5156 and self._next.token_type in self.TYPE_TOKENS 5157 ): 5158 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5159 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5160 this = self._parse_id_var() 5161 else: 5162 this = ( 5163 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5164 or self._parse_id_var() 5165 ) 5166 5167 self._match(TokenType.COLON) 5168 5169 if ( 5170 type_required 5171 and not isinstance(this, exp.DataType) 5172 and not self._match_set(self.TYPE_TOKENS, advance=False) 5173 ): 5174 self._retreat(index) 5175 return self._parse_types() 5176 5177 return self._parse_column_def(this) 5178 5179 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5180 if not self._match_text_seq("AT", "TIME", "ZONE"): 5181 return this 5182 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5183 5184 def _parse_column(self) -> t.Optional[exp.Expression]: 5185 this = self._parse_column_reference() 5186 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5187 5188 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5189 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5190 5191 return column 5192 5193 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5194 this = self._parse_field() 5195 if ( 5196 not this 5197 and self._match(TokenType.VALUES, advance=False) 5198 and self.VALUES_FOLLOWED_BY_PAREN 5199 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5200 ): 5201 this = self._parse_id_var() 5202 5203 if isinstance(this, exp.Identifier): 5204 # We bubble up comments from the Identifier to the Column 5205 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5206 5207 return this 5208 5209 def _parse_colon_as_variant_extract( 5210 self, this: t.Optional[exp.Expression] 5211 ) -> t.Optional[exp.Expression]: 5212 casts = [] 5213 json_path = [] 5214 escape = None 5215 5216 while self._match(TokenType.COLON): 5217 start_index = self._index 5218 5219 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5220 path = self._parse_column_ops( 5221 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5222 ) 5223 5224 # The cast :: operator has a lower precedence than the extraction operator :, so 5225 # we rearrange the AST appropriately to avoid casting the JSON path 5226 while isinstance(path, exp.Cast): 5227 casts.append(path.to) 5228 path = path.this 5229 5230 if casts: 5231 dcolon_offset = next( 5232 i 5233 for i, t in enumerate(self._tokens[start_index:]) 5234 if t.token_type == TokenType.DCOLON 5235 ) 5236 end_token = self._tokens[start_index + dcolon_offset - 1] 5237 else: 5238 end_token = self._prev 5239 5240 if path: 5241 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5242 # it'll roundtrip to a string literal in GET_PATH 5243 if isinstance(path, exp.Identifier) and path.quoted: 5244 escape = True 5245 5246 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5247 5248 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5249 # Databricks transforms it back to the colon/dot notation 5250 if json_path: 5251 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5252 5253 if json_path_expr: 5254 json_path_expr.set("escape", escape) 5255 5256 this = self.expression( 5257 exp.JSONExtract, 5258 this=this, 5259 expression=json_path_expr, 5260 variant_extract=True, 5261 ) 5262 5263 while casts: 5264 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5265 5266 return this 5267 5268 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5269 return self._parse_types() 5270 5271 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5272 this = self._parse_bracket(this) 5273 5274 while self._match_set(self.COLUMN_OPERATORS): 5275 op_token = self._prev.token_type 5276 op = self.COLUMN_OPERATORS.get(op_token) 5277 5278 if op_token == TokenType.DCOLON: 5279 field = self._parse_dcolon() 5280 if not field: 5281 self.raise_error("Expected type") 5282 elif op and self._curr: 5283 field = self._parse_column_reference() or self._parse_bracket() 5284 else: 5285 field = self._parse_field(any_token=True, anonymous_func=True) 5286 5287 if isinstance(field, (exp.Func, exp.Window)) and this: 5288 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5289 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5290 this = exp.replace_tree( 5291 this, 5292 lambda n: ( 5293 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5294 if n.table 5295 else n.this 5296 ) 5297 if isinstance(n, exp.Column) 5298 else n, 5299 ) 5300 5301 if op: 5302 this = op(self, this, field) 5303 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5304 this = self.expression( 5305 exp.Column, 5306 comments=this.comments, 5307 this=field, 5308 table=this.this, 5309 db=this.args.get("table"), 5310 catalog=this.args.get("db"), 5311 ) 5312 elif isinstance(field, exp.Window): 5313 # Move the exp.Dot's to the window's function 5314 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5315 field.set("this", window_func) 5316 this = field 5317 else: 5318 this = self.expression(exp.Dot, this=this, expression=field) 5319 5320 if field and field.comments: 5321 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5322 5323 this = self._parse_bracket(this) 5324 5325 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5326 5327 def _parse_primary(self) -> t.Optional[exp.Expression]: 5328 if self._match_set(self.PRIMARY_PARSERS): 5329 token_type = self._prev.token_type 5330 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5331 5332 if token_type == TokenType.STRING: 5333 expressions = [primary] 5334 while self._match(TokenType.STRING): 5335 expressions.append(exp.Literal.string(self._prev.text)) 5336 5337 if len(expressions) > 1: 5338 return self.expression(exp.Concat, expressions=expressions) 5339 5340 return primary 5341 5342 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5343 return exp.Literal.number(f"0.{self._prev.text}") 5344 5345 if self._match(TokenType.L_PAREN): 5346 comments = self._prev_comments 5347 query = self._parse_select() 5348 5349 if query: 5350 expressions = [query] 5351 else: 5352 expressions = self._parse_expressions() 5353 5354 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5355 5356 if not this and self._match(TokenType.R_PAREN, advance=False): 5357 this = self.expression(exp.Tuple) 5358 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5359 this = self._parse_subquery(this=this, parse_alias=False) 5360 elif isinstance(this, exp.Subquery): 5361 this = self._parse_subquery( 5362 this=self._parse_set_operations(this), parse_alias=False 5363 ) 5364 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5365 this = self.expression(exp.Tuple, expressions=expressions) 5366 else: 5367 this = self.expression(exp.Paren, this=this) 5368 5369 if this: 5370 this.add_comments(comments) 5371 5372 self._match_r_paren(expression=this) 5373 return this 5374 5375 return None 5376 5377 def _parse_field( 5378 self, 5379 any_token: bool = False, 5380 tokens: t.Optional[t.Collection[TokenType]] = None, 5381 anonymous_func: bool = False, 5382 ) -> t.Optional[exp.Expression]: 5383 if anonymous_func: 5384 field = ( 5385 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5386 or self._parse_primary() 5387 ) 5388 else: 5389 field = self._parse_primary() or self._parse_function( 5390 anonymous=anonymous_func, any_token=any_token 5391 ) 5392 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5393 5394 def _parse_function( 5395 self, 5396 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5397 anonymous: bool = False, 5398 optional_parens: bool = True, 5399 any_token: bool = False, 5400 ) -> t.Optional[exp.Expression]: 5401 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5402 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5403 fn_syntax = False 5404 if ( 5405 self._match(TokenType.L_BRACE, advance=False) 5406 and self._next 5407 and self._next.text.upper() == "FN" 5408 ): 5409 self._advance(2) 5410 fn_syntax = True 5411 5412 func = self._parse_function_call( 5413 functions=functions, 5414 anonymous=anonymous, 5415 optional_parens=optional_parens, 5416 any_token=any_token, 5417 ) 5418 5419 if fn_syntax: 5420 self._match(TokenType.R_BRACE) 5421 5422 return func 5423 5424 def _parse_function_call( 5425 self, 5426 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5427 anonymous: bool = False, 5428 optional_parens: bool = True, 5429 any_token: bool = False, 5430 ) -> t.Optional[exp.Expression]: 5431 if not self._curr: 5432 return None 5433 5434 comments = self._curr.comments 5435 token_type = self._curr.token_type 5436 this = self._curr.text 5437 upper = this.upper() 5438 5439 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5440 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5441 self._advance() 5442 return self._parse_window(parser(self)) 5443 5444 if not self._next or self._next.token_type != TokenType.L_PAREN: 5445 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5446 self._advance() 5447 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5448 5449 return None 5450 5451 if any_token: 5452 if token_type in self.RESERVED_TOKENS: 5453 return None 5454 elif token_type not in self.FUNC_TOKENS: 5455 return None 5456 5457 self._advance(2) 5458 5459 parser = self.FUNCTION_PARSERS.get(upper) 5460 if parser and not anonymous: 5461 this = parser(self) 5462 else: 5463 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5464 5465 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5466 this = self.expression( 5467 subquery_predicate, comments=comments, this=self._parse_select() 5468 ) 5469 self._match_r_paren() 5470 return this 5471 5472 if functions is None: 5473 functions = self.FUNCTIONS 5474 5475 function = functions.get(upper) 5476 known_function = function and not anonymous 5477 5478 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5479 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5480 5481 post_func_comments = self._curr and self._curr.comments 5482 if known_function and post_func_comments: 5483 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5484 # call we'll construct it as exp.Anonymous, even if it's "known" 5485 if any( 5486 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5487 for comment in post_func_comments 5488 ): 5489 known_function = False 5490 5491 if alias and known_function: 5492 args = self._kv_to_prop_eq(args) 5493 5494 if known_function: 5495 func_builder = t.cast(t.Callable, function) 5496 5497 if "dialect" in func_builder.__code__.co_varnames: 5498 func = func_builder(args, dialect=self.dialect) 5499 else: 5500 func = func_builder(args) 5501 5502 func = self.validate_expression(func, args) 5503 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5504 func.meta["name"] = this 5505 5506 this = func 5507 else: 5508 if token_type == TokenType.IDENTIFIER: 5509 this = exp.Identifier(this=this, quoted=True) 5510 this = self.expression(exp.Anonymous, this=this, expressions=args) 5511 5512 if isinstance(this, exp.Expression): 5513 this.add_comments(comments) 5514 5515 self._match_r_paren(this) 5516 return self._parse_window(this) 5517 5518 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5519 return expression 5520 5521 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5522 transformed = [] 5523 5524 for index, e in enumerate(expressions): 5525 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5526 if isinstance(e, exp.Alias): 5527 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5528 5529 if not isinstance(e, exp.PropertyEQ): 5530 e = self.expression( 5531 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5532 ) 5533 5534 if isinstance(e.this, exp.Column): 5535 e.this.replace(e.this.this) 5536 else: 5537 e = self._to_prop_eq(e, index) 5538 5539 transformed.append(e) 5540 5541 return transformed 5542 5543 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5544 return self._parse_statement() 5545 5546 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5547 return self._parse_column_def(self._parse_id_var()) 5548 5549 def _parse_user_defined_function( 5550 self, kind: t.Optional[TokenType] = None 5551 ) -> t.Optional[exp.Expression]: 5552 this = self._parse_id_var() 5553 5554 while self._match(TokenType.DOT): 5555 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5556 5557 if not self._match(TokenType.L_PAREN): 5558 return this 5559 5560 expressions = self._parse_csv(self._parse_function_parameter) 5561 self._match_r_paren() 5562 return self.expression( 5563 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5564 ) 5565 5566 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5567 literal = self._parse_primary() 5568 if literal: 5569 return self.expression(exp.Introducer, this=token.text, expression=literal) 5570 5571 return self.expression(exp.Identifier, this=token.text) 5572 5573 def _parse_session_parameter(self) -> exp.SessionParameter: 5574 kind = None 5575 this = self._parse_id_var() or self._parse_primary() 5576 5577 if this and self._match(TokenType.DOT): 5578 kind = this.name 5579 this = self._parse_var() or self._parse_primary() 5580 5581 return self.expression(exp.SessionParameter, this=this, kind=kind) 5582 5583 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5584 return self._parse_id_var() 5585 5586 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5587 index = self._index 5588 5589 if self._match(TokenType.L_PAREN): 5590 expressions = t.cast( 5591 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5592 ) 5593 5594 if not self._match(TokenType.R_PAREN): 5595 self._retreat(index) 5596 else: 5597 expressions = [self._parse_lambda_arg()] 5598 5599 if self._match_set(self.LAMBDAS): 5600 return self.LAMBDAS[self._prev.token_type](self, expressions) 5601 5602 self._retreat(index) 5603 5604 this: t.Optional[exp.Expression] 5605 5606 if self._match(TokenType.DISTINCT): 5607 this = self.expression( 5608 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5609 ) 5610 else: 5611 this = self._parse_select_or_expression(alias=alias) 5612 5613 return self._parse_limit( 5614 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5615 ) 5616 5617 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5618 index = self._index 5619 if not self._match(TokenType.L_PAREN): 5620 return this 5621 5622 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5623 # expr can be of both types 5624 if self._match_set(self.SELECT_START_TOKENS): 5625 self._retreat(index) 5626 return this 5627 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5628 self._match_r_paren() 5629 return self.expression(exp.Schema, this=this, expressions=args) 5630 5631 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5632 return self._parse_column_def(self._parse_field(any_token=True)) 5633 5634 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5635 # column defs are not really columns, they're identifiers 5636 if isinstance(this, exp.Column): 5637 this = this.this 5638 5639 kind = self._parse_types(schema=True) 5640 5641 if self._match_text_seq("FOR", "ORDINALITY"): 5642 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5643 5644 constraints: t.List[exp.Expression] = [] 5645 5646 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5647 ("ALIAS", "MATERIALIZED") 5648 ): 5649 persisted = self._prev.text.upper() == "MATERIALIZED" 5650 constraint_kind = exp.ComputedColumnConstraint( 5651 this=self._parse_assignment(), 5652 persisted=persisted or self._match_text_seq("PERSISTED"), 5653 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5654 ) 5655 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5656 elif ( 5657 kind 5658 and self._match(TokenType.ALIAS, advance=False) 5659 and ( 5660 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5661 or (self._next and self._next.token_type == TokenType.L_PAREN) 5662 ) 5663 ): 5664 self._advance() 5665 constraints.append( 5666 self.expression( 5667 exp.ColumnConstraint, 5668 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5669 ) 5670 ) 5671 5672 while True: 5673 constraint = self._parse_column_constraint() 5674 if not constraint: 5675 break 5676 constraints.append(constraint) 5677 5678 if not kind and not constraints: 5679 return this 5680 5681 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5682 5683 def _parse_auto_increment( 5684 self, 5685 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5686 start = None 5687 increment = None 5688 5689 if self._match(TokenType.L_PAREN, advance=False): 5690 args = self._parse_wrapped_csv(self._parse_bitwise) 5691 start = seq_get(args, 0) 5692 increment = seq_get(args, 1) 5693 elif self._match_text_seq("START"): 5694 start = self._parse_bitwise() 5695 self._match_text_seq("INCREMENT") 5696 increment = self._parse_bitwise() 5697 5698 if start and increment: 5699 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5700 5701 return exp.AutoIncrementColumnConstraint() 5702 5703 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5704 if not self._match_text_seq("REFRESH"): 5705 self._retreat(self._index - 1) 5706 return None 5707 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5708 5709 def _parse_compress(self) -> exp.CompressColumnConstraint: 5710 if self._match(TokenType.L_PAREN, advance=False): 5711 return self.expression( 5712 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5713 ) 5714 5715 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5716 5717 def _parse_generated_as_identity( 5718 self, 5719 ) -> ( 5720 exp.GeneratedAsIdentityColumnConstraint 5721 | exp.ComputedColumnConstraint 5722 | exp.GeneratedAsRowColumnConstraint 5723 ): 5724 if self._match_text_seq("BY", "DEFAULT"): 5725 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5726 this = self.expression( 5727 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5728 ) 5729 else: 5730 self._match_text_seq("ALWAYS") 5731 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5732 5733 self._match(TokenType.ALIAS) 5734 5735 if self._match_text_seq("ROW"): 5736 start = self._match_text_seq("START") 5737 if not start: 5738 self._match(TokenType.END) 5739 hidden = self._match_text_seq("HIDDEN") 5740 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5741 5742 identity = self._match_text_seq("IDENTITY") 5743 5744 if self._match(TokenType.L_PAREN): 5745 if self._match(TokenType.START_WITH): 5746 this.set("start", self._parse_bitwise()) 5747 if self._match_text_seq("INCREMENT", "BY"): 5748 this.set("increment", self._parse_bitwise()) 5749 if self._match_text_seq("MINVALUE"): 5750 this.set("minvalue", self._parse_bitwise()) 5751 if self._match_text_seq("MAXVALUE"): 5752 this.set("maxvalue", self._parse_bitwise()) 5753 5754 if self._match_text_seq("CYCLE"): 5755 this.set("cycle", True) 5756 elif self._match_text_seq("NO", "CYCLE"): 5757 this.set("cycle", False) 5758 5759 if not identity: 5760 this.set("expression", self._parse_range()) 5761 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5762 args = self._parse_csv(self._parse_bitwise) 5763 this.set("start", seq_get(args, 0)) 5764 this.set("increment", seq_get(args, 1)) 5765 5766 self._match_r_paren() 5767 5768 return this 5769 5770 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5771 self._match_text_seq("LENGTH") 5772 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5773 5774 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5775 if self._match_text_seq("NULL"): 5776 return self.expression(exp.NotNullColumnConstraint) 5777 if self._match_text_seq("CASESPECIFIC"): 5778 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5779 if self._match_text_seq("FOR", "REPLICATION"): 5780 return self.expression(exp.NotForReplicationColumnConstraint) 5781 5782 # Unconsume the `NOT` token 5783 self._retreat(self._index - 1) 5784 return None 5785 5786 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5787 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5788 5789 procedure_option_follows = ( 5790 self._match(TokenType.WITH, advance=False) 5791 and self._next 5792 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5793 ) 5794 5795 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5796 return self.expression( 5797 exp.ColumnConstraint, 5798 this=this, 5799 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5800 ) 5801 5802 return this 5803 5804 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5805 if not self._match(TokenType.CONSTRAINT): 5806 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5807 5808 return self.expression( 5809 exp.Constraint, 5810 this=self._parse_id_var(), 5811 expressions=self._parse_unnamed_constraints(), 5812 ) 5813 5814 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5815 constraints = [] 5816 while True: 5817 constraint = self._parse_unnamed_constraint() or self._parse_function() 5818 if not constraint: 5819 break 5820 constraints.append(constraint) 5821 5822 return constraints 5823 5824 def _parse_unnamed_constraint( 5825 self, constraints: t.Optional[t.Collection[str]] = None 5826 ) -> t.Optional[exp.Expression]: 5827 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5828 constraints or self.CONSTRAINT_PARSERS 5829 ): 5830 return None 5831 5832 constraint = self._prev.text.upper() 5833 if constraint not in self.CONSTRAINT_PARSERS: 5834 self.raise_error(f"No parser found for schema constraint {constraint}.") 5835 5836 return self.CONSTRAINT_PARSERS[constraint](self) 5837 5838 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5839 return self._parse_id_var(any_token=False) 5840 5841 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5842 self._match_text_seq("KEY") 5843 return self.expression( 5844 exp.UniqueColumnConstraint, 5845 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5846 this=self._parse_schema(self._parse_unique_key()), 5847 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5848 on_conflict=self._parse_on_conflict(), 5849 ) 5850 5851 def _parse_key_constraint_options(self) -> t.List[str]: 5852 options = [] 5853 while True: 5854 if not self._curr: 5855 break 5856 5857 if self._match(TokenType.ON): 5858 action = None 5859 on = self._advance_any() and self._prev.text 5860 5861 if self._match_text_seq("NO", "ACTION"): 5862 action = "NO ACTION" 5863 elif self._match_text_seq("CASCADE"): 5864 action = "CASCADE" 5865 elif self._match_text_seq("RESTRICT"): 5866 action = "RESTRICT" 5867 elif self._match_pair(TokenType.SET, TokenType.NULL): 5868 action = "SET NULL" 5869 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5870 action = "SET DEFAULT" 5871 else: 5872 self.raise_error("Invalid key constraint") 5873 5874 options.append(f"ON {on} {action}") 5875 else: 5876 var = self._parse_var_from_options( 5877 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5878 ) 5879 if not var: 5880 break 5881 options.append(var.name) 5882 5883 return options 5884 5885 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5886 if match and not self._match(TokenType.REFERENCES): 5887 return None 5888 5889 expressions = None 5890 this = self._parse_table(schema=True) 5891 options = self._parse_key_constraint_options() 5892 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5893 5894 def _parse_foreign_key(self) -> exp.ForeignKey: 5895 expressions = self._parse_wrapped_id_vars() 5896 reference = self._parse_references() 5897 options = {} 5898 5899 while self._match(TokenType.ON): 5900 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5901 self.raise_error("Expected DELETE or UPDATE") 5902 5903 kind = self._prev.text.lower() 5904 5905 if self._match_text_seq("NO", "ACTION"): 5906 action = "NO ACTION" 5907 elif self._match(TokenType.SET): 5908 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5909 action = "SET " + self._prev.text.upper() 5910 else: 5911 self._advance() 5912 action = self._prev.text.upper() 5913 5914 options[kind] = action 5915 5916 return self.expression( 5917 exp.ForeignKey, 5918 expressions=expressions, 5919 reference=reference, 5920 **options, # type: ignore 5921 ) 5922 5923 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5924 return self._parse_ordered() or self._parse_field() 5925 5926 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5927 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5928 self._retreat(self._index - 1) 5929 return None 5930 5931 id_vars = self._parse_wrapped_id_vars() 5932 return self.expression( 5933 exp.PeriodForSystemTimeConstraint, 5934 this=seq_get(id_vars, 0), 5935 expression=seq_get(id_vars, 1), 5936 ) 5937 5938 def _parse_primary_key( 5939 self, wrapped_optional: bool = False, in_props: bool = False 5940 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5941 desc = ( 5942 self._match_set((TokenType.ASC, TokenType.DESC)) 5943 and self._prev.token_type == TokenType.DESC 5944 ) 5945 5946 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5947 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5948 5949 expressions = self._parse_wrapped_csv( 5950 self._parse_primary_key_part, optional=wrapped_optional 5951 ) 5952 options = self._parse_key_constraint_options() 5953 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5954 5955 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5956 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5957 5958 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5959 """ 5960 Parses a datetime column in ODBC format. We parse the column into the corresponding 5961 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5962 same as we did for `DATE('yyyy-mm-dd')`. 5963 5964 Reference: 5965 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5966 """ 5967 self._match(TokenType.VAR) 5968 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5969 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5970 if not self._match(TokenType.R_BRACE): 5971 self.raise_error("Expected }") 5972 return expression 5973 5974 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5975 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5976 return this 5977 5978 bracket_kind = self._prev.token_type 5979 if ( 5980 bracket_kind == TokenType.L_BRACE 5981 and self._curr 5982 and self._curr.token_type == TokenType.VAR 5983 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5984 ): 5985 return self._parse_odbc_datetime_literal() 5986 5987 expressions = self._parse_csv( 5988 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5989 ) 5990 5991 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5992 self.raise_error("Expected ]") 5993 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5994 self.raise_error("Expected }") 5995 5996 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5997 if bracket_kind == TokenType.L_BRACE: 5998 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5999 elif not this: 6000 this = build_array_constructor( 6001 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6002 ) 6003 else: 6004 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6005 if constructor_type: 6006 return build_array_constructor( 6007 constructor_type, 6008 args=expressions, 6009 bracket_kind=bracket_kind, 6010 dialect=self.dialect, 6011 ) 6012 6013 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6014 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6015 6016 self._add_comments(this) 6017 return self._parse_bracket(this) 6018 6019 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6020 if self._match(TokenType.COLON): 6021 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6022 return this 6023 6024 def _parse_case(self) -> t.Optional[exp.Expression]: 6025 ifs = [] 6026 default = None 6027 6028 comments = self._prev_comments 6029 expression = self._parse_assignment() 6030 6031 while self._match(TokenType.WHEN): 6032 this = self._parse_assignment() 6033 self._match(TokenType.THEN) 6034 then = self._parse_assignment() 6035 ifs.append(self.expression(exp.If, this=this, true=then)) 6036 6037 if self._match(TokenType.ELSE): 6038 default = self._parse_assignment() 6039 6040 if not self._match(TokenType.END): 6041 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6042 default = exp.column("interval") 6043 else: 6044 self.raise_error("Expected END after CASE", self._prev) 6045 6046 return self.expression( 6047 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6048 ) 6049 6050 def _parse_if(self) -> t.Optional[exp.Expression]: 6051 if self._match(TokenType.L_PAREN): 6052 args = self._parse_csv(self._parse_assignment) 6053 this = self.validate_expression(exp.If.from_arg_list(args), args) 6054 self._match_r_paren() 6055 else: 6056 index = self._index - 1 6057 6058 if self.NO_PAREN_IF_COMMANDS and index == 0: 6059 return self._parse_as_command(self._prev) 6060 6061 condition = self._parse_assignment() 6062 6063 if not condition: 6064 self._retreat(index) 6065 return None 6066 6067 self._match(TokenType.THEN) 6068 true = self._parse_assignment() 6069 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6070 self._match(TokenType.END) 6071 this = self.expression(exp.If, this=condition, true=true, false=false) 6072 6073 return this 6074 6075 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6076 if not self._match_text_seq("VALUE", "FOR"): 6077 self._retreat(self._index - 1) 6078 return None 6079 6080 return self.expression( 6081 exp.NextValueFor, 6082 this=self._parse_column(), 6083 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6084 ) 6085 6086 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6087 this = self._parse_function() or self._parse_var_or_string(upper=True) 6088 6089 if self._match(TokenType.FROM): 6090 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6091 6092 if not self._match(TokenType.COMMA): 6093 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6094 6095 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6096 6097 def _parse_gap_fill(self) -> exp.GapFill: 6098 self._match(TokenType.TABLE) 6099 this = self._parse_table() 6100 6101 self._match(TokenType.COMMA) 6102 args = [this, *self._parse_csv(self._parse_lambda)] 6103 6104 gap_fill = exp.GapFill.from_arg_list(args) 6105 return self.validate_expression(gap_fill, args) 6106 6107 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6108 this = self._parse_assignment() 6109 6110 if not self._match(TokenType.ALIAS): 6111 if self._match(TokenType.COMMA): 6112 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6113 6114 self.raise_error("Expected AS after CAST") 6115 6116 fmt = None 6117 to = self._parse_types() 6118 6119 if self._match(TokenType.FORMAT): 6120 fmt_string = self._parse_string() 6121 fmt = self._parse_at_time_zone(fmt_string) 6122 6123 if not to: 6124 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6125 if to.this in exp.DataType.TEMPORAL_TYPES: 6126 this = self.expression( 6127 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6128 this=this, 6129 format=exp.Literal.string( 6130 format_time( 6131 fmt_string.this if fmt_string else "", 6132 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6133 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6134 ) 6135 ), 6136 safe=safe, 6137 ) 6138 6139 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6140 this.set("zone", fmt.args["zone"]) 6141 return this 6142 elif not to: 6143 self.raise_error("Expected TYPE after CAST") 6144 elif isinstance(to, exp.Identifier): 6145 to = exp.DataType.build(to.name, udt=True) 6146 elif to.this == exp.DataType.Type.CHAR: 6147 if self._match(TokenType.CHARACTER_SET): 6148 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6149 6150 return self.expression( 6151 exp.Cast if strict else exp.TryCast, 6152 this=this, 6153 to=to, 6154 format=fmt, 6155 safe=safe, 6156 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6157 ) 6158 6159 def _parse_string_agg(self) -> exp.GroupConcat: 6160 if self._match(TokenType.DISTINCT): 6161 args: t.List[t.Optional[exp.Expression]] = [ 6162 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6163 ] 6164 if self._match(TokenType.COMMA): 6165 args.extend(self._parse_csv(self._parse_assignment)) 6166 else: 6167 args = self._parse_csv(self._parse_assignment) # type: ignore 6168 6169 if self._match_text_seq("ON", "OVERFLOW"): 6170 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6171 if self._match_text_seq("ERROR"): 6172 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6173 else: 6174 self._match_text_seq("TRUNCATE") 6175 on_overflow = self.expression( 6176 exp.OverflowTruncateBehavior, 6177 this=self._parse_string(), 6178 with_count=( 6179 self._match_text_seq("WITH", "COUNT") 6180 or not self._match_text_seq("WITHOUT", "COUNT") 6181 ), 6182 ) 6183 else: 6184 on_overflow = None 6185 6186 index = self._index 6187 if not self._match(TokenType.R_PAREN) and args: 6188 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6189 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6190 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6191 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6192 6193 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6194 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6195 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6196 if not self._match_text_seq("WITHIN", "GROUP"): 6197 self._retreat(index) 6198 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6199 6200 # The corresponding match_r_paren will be called in parse_function (caller) 6201 self._match_l_paren() 6202 6203 return self.expression( 6204 exp.GroupConcat, 6205 this=self._parse_order(this=seq_get(args, 0)), 6206 separator=seq_get(args, 1), 6207 on_overflow=on_overflow, 6208 ) 6209 6210 def _parse_convert( 6211 self, strict: bool, safe: t.Optional[bool] = None 6212 ) -> t.Optional[exp.Expression]: 6213 this = self._parse_bitwise() 6214 6215 if self._match(TokenType.USING): 6216 to: t.Optional[exp.Expression] = self.expression( 6217 exp.CharacterSet, this=self._parse_var() 6218 ) 6219 elif self._match(TokenType.COMMA): 6220 to = self._parse_types() 6221 else: 6222 to = None 6223 6224 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6225 6226 def _parse_xml_table(self) -> exp.XMLTable: 6227 namespaces = None 6228 passing = None 6229 columns = None 6230 6231 if self._match_text_seq("XMLNAMESPACES", "("): 6232 namespaces = self._parse_xml_namespace() 6233 self._match_text_seq(")", ",") 6234 6235 this = self._parse_string() 6236 6237 if self._match_text_seq("PASSING"): 6238 # The BY VALUE keywords are optional and are provided for semantic clarity 6239 self._match_text_seq("BY", "VALUE") 6240 passing = self._parse_csv(self._parse_column) 6241 6242 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6243 6244 if self._match_text_seq("COLUMNS"): 6245 columns = self._parse_csv(self._parse_field_def) 6246 6247 return self.expression( 6248 exp.XMLTable, 6249 this=this, 6250 namespaces=namespaces, 6251 passing=passing, 6252 columns=columns, 6253 by_ref=by_ref, 6254 ) 6255 6256 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6257 namespaces = [] 6258 6259 while True: 6260 if self._match_text_seq("DEFAULT"): 6261 uri = self._parse_string() 6262 else: 6263 uri = self._parse_alias(self._parse_string()) 6264 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6265 if not self._match(TokenType.COMMA): 6266 break 6267 6268 return namespaces 6269 6270 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6271 """ 6272 There are generally two variants of the DECODE function: 6273 6274 - DECODE(bin, charset) 6275 - DECODE(expression, search, result [, search, result] ... [, default]) 6276 6277 The second variant will always be parsed into a CASE expression. Note that NULL 6278 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6279 instead of relying on pattern matching. 6280 """ 6281 args = self._parse_csv(self._parse_assignment) 6282 6283 if len(args) < 3: 6284 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6285 6286 expression, *expressions = args 6287 if not expression: 6288 return None 6289 6290 ifs = [] 6291 for search, result in zip(expressions[::2], expressions[1::2]): 6292 if not search or not result: 6293 return None 6294 6295 if isinstance(search, exp.Literal): 6296 ifs.append( 6297 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6298 ) 6299 elif isinstance(search, exp.Null): 6300 ifs.append( 6301 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6302 ) 6303 else: 6304 cond = exp.or_( 6305 exp.EQ(this=expression.copy(), expression=search), 6306 exp.and_( 6307 exp.Is(this=expression.copy(), expression=exp.Null()), 6308 exp.Is(this=search.copy(), expression=exp.Null()), 6309 copy=False, 6310 ), 6311 copy=False, 6312 ) 6313 ifs.append(exp.If(this=cond, true=result)) 6314 6315 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6316 6317 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6318 self._match_text_seq("KEY") 6319 key = self._parse_column() 6320 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6321 self._match_text_seq("VALUE") 6322 value = self._parse_bitwise() 6323 6324 if not key and not value: 6325 return None 6326 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6327 6328 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6329 if not this or not self._match_text_seq("FORMAT", "JSON"): 6330 return this 6331 6332 return self.expression(exp.FormatJson, this=this) 6333 6334 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6335 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6336 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6337 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6338 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6339 else: 6340 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6341 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6342 6343 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6344 6345 if not empty and not error and not null: 6346 return None 6347 6348 return self.expression( 6349 exp.OnCondition, 6350 empty=empty, 6351 error=error, 6352 null=null, 6353 ) 6354 6355 def _parse_on_handling( 6356 self, on: str, *values: str 6357 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6358 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6359 for value in values: 6360 if self._match_text_seq(value, "ON", on): 6361 return f"{value} ON {on}" 6362 6363 index = self._index 6364 if self._match(TokenType.DEFAULT): 6365 default_value = self._parse_bitwise() 6366 if self._match_text_seq("ON", on): 6367 return default_value 6368 6369 self._retreat(index) 6370 6371 return None 6372 6373 @t.overload 6374 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6375 6376 @t.overload 6377 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6378 6379 def _parse_json_object(self, agg=False): 6380 star = self._parse_star() 6381 expressions = ( 6382 [star] 6383 if star 6384 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6385 ) 6386 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6387 6388 unique_keys = None 6389 if self._match_text_seq("WITH", "UNIQUE"): 6390 unique_keys = True 6391 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6392 unique_keys = False 6393 6394 self._match_text_seq("KEYS") 6395 6396 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6397 self._parse_type() 6398 ) 6399 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6400 6401 return self.expression( 6402 exp.JSONObjectAgg if agg else exp.JSONObject, 6403 expressions=expressions, 6404 null_handling=null_handling, 6405 unique_keys=unique_keys, 6406 return_type=return_type, 6407 encoding=encoding, 6408 ) 6409 6410 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6411 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6412 if not self._match_text_seq("NESTED"): 6413 this = self._parse_id_var() 6414 kind = self._parse_types(allow_identifiers=False) 6415 nested = None 6416 else: 6417 this = None 6418 kind = None 6419 nested = True 6420 6421 path = self._match_text_seq("PATH") and self._parse_string() 6422 nested_schema = nested and self._parse_json_schema() 6423 6424 return self.expression( 6425 exp.JSONColumnDef, 6426 this=this, 6427 kind=kind, 6428 path=path, 6429 nested_schema=nested_schema, 6430 ) 6431 6432 def _parse_json_schema(self) -> exp.JSONSchema: 6433 self._match_text_seq("COLUMNS") 6434 return self.expression( 6435 exp.JSONSchema, 6436 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6437 ) 6438 6439 def _parse_json_table(self) -> exp.JSONTable: 6440 this = self._parse_format_json(self._parse_bitwise()) 6441 path = self._match(TokenType.COMMA) and self._parse_string() 6442 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6443 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6444 schema = self._parse_json_schema() 6445 6446 return exp.JSONTable( 6447 this=this, 6448 schema=schema, 6449 path=path, 6450 error_handling=error_handling, 6451 empty_handling=empty_handling, 6452 ) 6453 6454 def _parse_match_against(self) -> exp.MatchAgainst: 6455 expressions = self._parse_csv(self._parse_column) 6456 6457 self._match_text_seq(")", "AGAINST", "(") 6458 6459 this = self._parse_string() 6460 6461 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6462 modifier = "IN NATURAL LANGUAGE MODE" 6463 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6464 modifier = f"{modifier} WITH QUERY EXPANSION" 6465 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6466 modifier = "IN BOOLEAN MODE" 6467 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6468 modifier = "WITH QUERY EXPANSION" 6469 else: 6470 modifier = None 6471 6472 return self.expression( 6473 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6474 ) 6475 6476 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6477 def _parse_open_json(self) -> exp.OpenJSON: 6478 this = self._parse_bitwise() 6479 path = self._match(TokenType.COMMA) and self._parse_string() 6480 6481 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6482 this = self._parse_field(any_token=True) 6483 kind = self._parse_types() 6484 path = self._parse_string() 6485 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6486 6487 return self.expression( 6488 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6489 ) 6490 6491 expressions = None 6492 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6493 self._match_l_paren() 6494 expressions = self._parse_csv(_parse_open_json_column_def) 6495 6496 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6497 6498 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6499 args = self._parse_csv(self._parse_bitwise) 6500 6501 if self._match(TokenType.IN): 6502 return self.expression( 6503 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6504 ) 6505 6506 if haystack_first: 6507 haystack = seq_get(args, 0) 6508 needle = seq_get(args, 1) 6509 else: 6510 haystack = seq_get(args, 1) 6511 needle = seq_get(args, 0) 6512 6513 return self.expression( 6514 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6515 ) 6516 6517 def _parse_predict(self) -> exp.Predict: 6518 self._match_text_seq("MODEL") 6519 this = self._parse_table() 6520 6521 self._match(TokenType.COMMA) 6522 self._match_text_seq("TABLE") 6523 6524 return self.expression( 6525 exp.Predict, 6526 this=this, 6527 expression=self._parse_table(), 6528 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6529 ) 6530 6531 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6532 args = self._parse_csv(self._parse_table) 6533 return exp.JoinHint(this=func_name.upper(), expressions=args) 6534 6535 def _parse_substring(self) -> exp.Substring: 6536 # Postgres supports the form: substring(string [from int] [for int]) 6537 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6538 6539 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6540 6541 if self._match(TokenType.FROM): 6542 args.append(self._parse_bitwise()) 6543 if self._match(TokenType.FOR): 6544 if len(args) == 1: 6545 args.append(exp.Literal.number(1)) 6546 args.append(self._parse_bitwise()) 6547 6548 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6549 6550 def _parse_trim(self) -> exp.Trim: 6551 # https://www.w3resource.com/sql/character-functions/trim.php 6552 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6553 6554 position = None 6555 collation = None 6556 expression = None 6557 6558 if self._match_texts(self.TRIM_TYPES): 6559 position = self._prev.text.upper() 6560 6561 this = self._parse_bitwise() 6562 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6563 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6564 expression = self._parse_bitwise() 6565 6566 if invert_order: 6567 this, expression = expression, this 6568 6569 if self._match(TokenType.COLLATE): 6570 collation = self._parse_bitwise() 6571 6572 return self.expression( 6573 exp.Trim, this=this, position=position, expression=expression, collation=collation 6574 ) 6575 6576 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6577 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6578 6579 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6580 return self._parse_window(self._parse_id_var(), alias=True) 6581 6582 def _parse_respect_or_ignore_nulls( 6583 self, this: t.Optional[exp.Expression] 6584 ) -> t.Optional[exp.Expression]: 6585 if self._match_text_seq("IGNORE", "NULLS"): 6586 return self.expression(exp.IgnoreNulls, this=this) 6587 if self._match_text_seq("RESPECT", "NULLS"): 6588 return self.expression(exp.RespectNulls, this=this) 6589 return this 6590 6591 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6592 if self._match(TokenType.HAVING): 6593 self._match_texts(("MAX", "MIN")) 6594 max = self._prev.text.upper() != "MIN" 6595 return self.expression( 6596 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6597 ) 6598 6599 return this 6600 6601 def _parse_window( 6602 self, this: t.Optional[exp.Expression], alias: bool = False 6603 ) -> t.Optional[exp.Expression]: 6604 func = this 6605 comments = func.comments if isinstance(func, exp.Expression) else None 6606 6607 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6608 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6609 if self._match_text_seq("WITHIN", "GROUP"): 6610 order = self._parse_wrapped(self._parse_order) 6611 this = self.expression(exp.WithinGroup, this=this, expression=order) 6612 6613 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6614 self._match(TokenType.WHERE) 6615 this = self.expression( 6616 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6617 ) 6618 self._match_r_paren() 6619 6620 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6621 # Some dialects choose to implement and some do not. 6622 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6623 6624 # There is some code above in _parse_lambda that handles 6625 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6626 6627 # The below changes handle 6628 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6629 6630 # Oracle allows both formats 6631 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6632 # and Snowflake chose to do the same for familiarity 6633 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6634 if isinstance(this, exp.AggFunc): 6635 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6636 6637 if ignore_respect and ignore_respect is not this: 6638 ignore_respect.replace(ignore_respect.this) 6639 this = self.expression(ignore_respect.__class__, this=this) 6640 6641 this = self._parse_respect_or_ignore_nulls(this) 6642 6643 # bigquery select from window x AS (partition by ...) 6644 if alias: 6645 over = None 6646 self._match(TokenType.ALIAS) 6647 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6648 return this 6649 else: 6650 over = self._prev.text.upper() 6651 6652 if comments and isinstance(func, exp.Expression): 6653 func.pop_comments() 6654 6655 if not self._match(TokenType.L_PAREN): 6656 return self.expression( 6657 exp.Window, 6658 comments=comments, 6659 this=this, 6660 alias=self._parse_id_var(False), 6661 over=over, 6662 ) 6663 6664 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6665 6666 first = self._match(TokenType.FIRST) 6667 if self._match_text_seq("LAST"): 6668 first = False 6669 6670 partition, order = self._parse_partition_and_order() 6671 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6672 6673 if kind: 6674 self._match(TokenType.BETWEEN) 6675 start = self._parse_window_spec() 6676 self._match(TokenType.AND) 6677 end = self._parse_window_spec() 6678 6679 spec = self.expression( 6680 exp.WindowSpec, 6681 kind=kind, 6682 start=start["value"], 6683 start_side=start["side"], 6684 end=end["value"], 6685 end_side=end["side"], 6686 ) 6687 else: 6688 spec = None 6689 6690 self._match_r_paren() 6691 6692 window = self.expression( 6693 exp.Window, 6694 comments=comments, 6695 this=this, 6696 partition_by=partition, 6697 order=order, 6698 spec=spec, 6699 alias=window_alias, 6700 over=over, 6701 first=first, 6702 ) 6703 6704 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6705 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6706 return self._parse_window(window, alias=alias) 6707 6708 return window 6709 6710 def _parse_partition_and_order( 6711 self, 6712 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6713 return self._parse_partition_by(), self._parse_order() 6714 6715 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6716 self._match(TokenType.BETWEEN) 6717 6718 return { 6719 "value": ( 6720 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6721 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6722 or self._parse_bitwise() 6723 ), 6724 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6725 } 6726 6727 def _parse_alias( 6728 self, this: t.Optional[exp.Expression], explicit: bool = False 6729 ) -> t.Optional[exp.Expression]: 6730 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6731 # so this section tries to parse the clause version and if it fails, it treats the token 6732 # as an identifier (alias) 6733 if self._can_parse_limit_or_offset(): 6734 return this 6735 6736 any_token = self._match(TokenType.ALIAS) 6737 comments = self._prev_comments or [] 6738 6739 if explicit and not any_token: 6740 return this 6741 6742 if self._match(TokenType.L_PAREN): 6743 aliases = self.expression( 6744 exp.Aliases, 6745 comments=comments, 6746 this=this, 6747 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6748 ) 6749 self._match_r_paren(aliases) 6750 return aliases 6751 6752 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6753 self.STRING_ALIASES and self._parse_string_as_identifier() 6754 ) 6755 6756 if alias: 6757 comments.extend(alias.pop_comments()) 6758 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6759 column = this.this 6760 6761 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6762 if not this.comments and column and column.comments: 6763 this.comments = column.pop_comments() 6764 6765 return this 6766 6767 def _parse_id_var( 6768 self, 6769 any_token: bool = True, 6770 tokens: t.Optional[t.Collection[TokenType]] = None, 6771 ) -> t.Optional[exp.Expression]: 6772 expression = self._parse_identifier() 6773 if not expression and ( 6774 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6775 ): 6776 quoted = self._prev.token_type == TokenType.STRING 6777 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6778 6779 return expression 6780 6781 def _parse_string(self) -> t.Optional[exp.Expression]: 6782 if self._match_set(self.STRING_PARSERS): 6783 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6784 return self._parse_placeholder() 6785 6786 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6787 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6788 6789 def _parse_number(self) -> t.Optional[exp.Expression]: 6790 if self._match_set(self.NUMERIC_PARSERS): 6791 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6792 return self._parse_placeholder() 6793 6794 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6795 if self._match(TokenType.IDENTIFIER): 6796 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6797 return self._parse_placeholder() 6798 6799 def _parse_var( 6800 self, 6801 any_token: bool = False, 6802 tokens: t.Optional[t.Collection[TokenType]] = None, 6803 upper: bool = False, 6804 ) -> t.Optional[exp.Expression]: 6805 if ( 6806 (any_token and self._advance_any()) 6807 or self._match(TokenType.VAR) 6808 or (self._match_set(tokens) if tokens else False) 6809 ): 6810 return self.expression( 6811 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6812 ) 6813 return self._parse_placeholder() 6814 6815 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6816 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6817 self._advance() 6818 return self._prev 6819 return None 6820 6821 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6822 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6823 6824 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6825 return self._parse_primary() or self._parse_var(any_token=True) 6826 6827 def _parse_null(self) -> t.Optional[exp.Expression]: 6828 if self._match_set(self.NULL_TOKENS): 6829 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6830 return self._parse_placeholder() 6831 6832 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6833 if self._match(TokenType.TRUE): 6834 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6835 if self._match(TokenType.FALSE): 6836 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6837 return self._parse_placeholder() 6838 6839 def _parse_star(self) -> t.Optional[exp.Expression]: 6840 if self._match(TokenType.STAR): 6841 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6842 return self._parse_placeholder() 6843 6844 def _parse_parameter(self) -> exp.Parameter: 6845 this = self._parse_identifier() or self._parse_primary_or_var() 6846 return self.expression(exp.Parameter, this=this) 6847 6848 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6849 if self._match_set(self.PLACEHOLDER_PARSERS): 6850 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6851 if placeholder: 6852 return placeholder 6853 self._advance(-1) 6854 return None 6855 6856 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6857 if not self._match_texts(keywords): 6858 return None 6859 if self._match(TokenType.L_PAREN, advance=False): 6860 return self._parse_wrapped_csv(self._parse_expression) 6861 6862 expression = self._parse_expression() 6863 return [expression] if expression else None 6864 6865 def _parse_csv( 6866 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6867 ) -> t.List[exp.Expression]: 6868 parse_result = parse_method() 6869 items = [parse_result] if parse_result is not None else [] 6870 6871 while self._match(sep): 6872 self._add_comments(parse_result) 6873 parse_result = parse_method() 6874 if parse_result is not None: 6875 items.append(parse_result) 6876 6877 return items 6878 6879 def _parse_tokens( 6880 self, parse_method: t.Callable, expressions: t.Dict 6881 ) -> t.Optional[exp.Expression]: 6882 this = parse_method() 6883 6884 while self._match_set(expressions): 6885 this = self.expression( 6886 expressions[self._prev.token_type], 6887 this=this, 6888 comments=self._prev_comments, 6889 expression=parse_method(), 6890 ) 6891 6892 return this 6893 6894 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6895 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6896 6897 def _parse_wrapped_csv( 6898 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6899 ) -> t.List[exp.Expression]: 6900 return self._parse_wrapped( 6901 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6902 ) 6903 6904 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6905 wrapped = self._match(TokenType.L_PAREN) 6906 if not wrapped and not optional: 6907 self.raise_error("Expecting (") 6908 parse_result = parse_method() 6909 if wrapped: 6910 self._match_r_paren() 6911 return parse_result 6912 6913 def _parse_expressions(self) -> t.List[exp.Expression]: 6914 return self._parse_csv(self._parse_expression) 6915 6916 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6917 return self._parse_select() or self._parse_set_operations( 6918 self._parse_alias(self._parse_assignment(), explicit=True) 6919 if alias 6920 else self._parse_assignment() 6921 ) 6922 6923 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6924 return self._parse_query_modifiers( 6925 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6926 ) 6927 6928 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6929 this = None 6930 if self._match_texts(self.TRANSACTION_KIND): 6931 this = self._prev.text 6932 6933 self._match_texts(("TRANSACTION", "WORK")) 6934 6935 modes = [] 6936 while True: 6937 mode = [] 6938 while self._match(TokenType.VAR): 6939 mode.append(self._prev.text) 6940 6941 if mode: 6942 modes.append(" ".join(mode)) 6943 if not self._match(TokenType.COMMA): 6944 break 6945 6946 return self.expression(exp.Transaction, this=this, modes=modes) 6947 6948 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6949 chain = None 6950 savepoint = None 6951 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6952 6953 self._match_texts(("TRANSACTION", "WORK")) 6954 6955 if self._match_text_seq("TO"): 6956 self._match_text_seq("SAVEPOINT") 6957 savepoint = self._parse_id_var() 6958 6959 if self._match(TokenType.AND): 6960 chain = not self._match_text_seq("NO") 6961 self._match_text_seq("CHAIN") 6962 6963 if is_rollback: 6964 return self.expression(exp.Rollback, savepoint=savepoint) 6965 6966 return self.expression(exp.Commit, chain=chain) 6967 6968 def _parse_refresh(self) -> exp.Refresh: 6969 self._match(TokenType.TABLE) 6970 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6971 6972 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6973 if not self._match_text_seq("ADD"): 6974 return None 6975 6976 self._match(TokenType.COLUMN) 6977 exists_column = self._parse_exists(not_=True) 6978 expression = self._parse_field_def() 6979 6980 if expression: 6981 expression.set("exists", exists_column) 6982 6983 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6984 if self._match_texts(("FIRST", "AFTER")): 6985 position = self._prev.text 6986 column_position = self.expression( 6987 exp.ColumnPosition, this=self._parse_column(), position=position 6988 ) 6989 expression.set("position", column_position) 6990 6991 return expression 6992 6993 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6994 drop = self._match(TokenType.DROP) and self._parse_drop() 6995 if drop and not isinstance(drop, exp.Command): 6996 drop.set("kind", drop.args.get("kind", "COLUMN")) 6997 return drop 6998 6999 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7000 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7001 return self.expression( 7002 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7003 ) 7004 7005 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7006 index = self._index - 1 7007 7008 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7009 return self._parse_csv( 7010 lambda: self.expression( 7011 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7012 ) 7013 ) 7014 7015 self._retreat(index) 7016 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7017 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7018 7019 if self._match_text_seq("ADD", "COLUMNS"): 7020 schema = self._parse_schema() 7021 if schema: 7022 return [schema] 7023 return [] 7024 7025 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7026 7027 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7028 if self._match_texts(self.ALTER_ALTER_PARSERS): 7029 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7030 7031 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7032 # keyword after ALTER we default to parsing this statement 7033 self._match(TokenType.COLUMN) 7034 column = self._parse_field(any_token=True) 7035 7036 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7037 return self.expression(exp.AlterColumn, this=column, drop=True) 7038 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7039 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7040 if self._match(TokenType.COMMENT): 7041 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7042 if self._match_text_seq("DROP", "NOT", "NULL"): 7043 return self.expression( 7044 exp.AlterColumn, 7045 this=column, 7046 drop=True, 7047 allow_null=True, 7048 ) 7049 if self._match_text_seq("SET", "NOT", "NULL"): 7050 return self.expression( 7051 exp.AlterColumn, 7052 this=column, 7053 allow_null=False, 7054 ) 7055 self._match_text_seq("SET", "DATA") 7056 self._match_text_seq("TYPE") 7057 return self.expression( 7058 exp.AlterColumn, 7059 this=column, 7060 dtype=self._parse_types(), 7061 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7062 using=self._match(TokenType.USING) and self._parse_assignment(), 7063 ) 7064 7065 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7066 if self._match_texts(("ALL", "EVEN", "AUTO")): 7067 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7068 7069 self._match_text_seq("KEY", "DISTKEY") 7070 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7071 7072 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7073 if compound: 7074 self._match_text_seq("SORTKEY") 7075 7076 if self._match(TokenType.L_PAREN, advance=False): 7077 return self.expression( 7078 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7079 ) 7080 7081 self._match_texts(("AUTO", "NONE")) 7082 return self.expression( 7083 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7084 ) 7085 7086 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7087 index = self._index - 1 7088 7089 partition_exists = self._parse_exists() 7090 if self._match(TokenType.PARTITION, advance=False): 7091 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7092 7093 self._retreat(index) 7094 return self._parse_csv(self._parse_drop_column) 7095 7096 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7097 if self._match(TokenType.COLUMN): 7098 exists = self._parse_exists() 7099 old_column = self._parse_column() 7100 to = self._match_text_seq("TO") 7101 new_column = self._parse_column() 7102 7103 if old_column is None or to is None or new_column is None: 7104 return None 7105 7106 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7107 7108 self._match_text_seq("TO") 7109 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7110 7111 def _parse_alter_table_set(self) -> exp.AlterSet: 7112 alter_set = self.expression(exp.AlterSet) 7113 7114 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7115 "TABLE", "PROPERTIES" 7116 ): 7117 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7118 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7119 alter_set.set("expressions", [self._parse_assignment()]) 7120 elif self._match_texts(("LOGGED", "UNLOGGED")): 7121 alter_set.set("option", exp.var(self._prev.text.upper())) 7122 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7123 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7124 elif self._match_text_seq("LOCATION"): 7125 alter_set.set("location", self._parse_field()) 7126 elif self._match_text_seq("ACCESS", "METHOD"): 7127 alter_set.set("access_method", self._parse_field()) 7128 elif self._match_text_seq("TABLESPACE"): 7129 alter_set.set("tablespace", self._parse_field()) 7130 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7131 alter_set.set("file_format", [self._parse_field()]) 7132 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7133 alter_set.set("file_format", self._parse_wrapped_options()) 7134 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7135 alter_set.set("copy_options", self._parse_wrapped_options()) 7136 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7137 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7138 else: 7139 if self._match_text_seq("SERDE"): 7140 alter_set.set("serde", self._parse_field()) 7141 7142 alter_set.set("expressions", [self._parse_properties()]) 7143 7144 return alter_set 7145 7146 def _parse_alter(self) -> exp.Alter | exp.Command: 7147 start = self._prev 7148 7149 alter_token = self._match_set(self.ALTERABLES) and self._prev 7150 if not alter_token: 7151 return self._parse_as_command(start) 7152 7153 exists = self._parse_exists() 7154 only = self._match_text_seq("ONLY") 7155 this = self._parse_table(schema=True) 7156 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7157 7158 if self._next: 7159 self._advance() 7160 7161 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7162 if parser: 7163 actions = ensure_list(parser(self)) 7164 not_valid = self._match_text_seq("NOT", "VALID") 7165 options = self._parse_csv(self._parse_property) 7166 7167 if not self._curr and actions: 7168 return self.expression( 7169 exp.Alter, 7170 this=this, 7171 kind=alter_token.text.upper(), 7172 exists=exists, 7173 actions=actions, 7174 only=only, 7175 options=options, 7176 cluster=cluster, 7177 not_valid=not_valid, 7178 ) 7179 7180 return self._parse_as_command(start) 7181 7182 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7183 start = self._prev 7184 # https://duckdb.org/docs/sql/statements/analyze 7185 if not self._curr: 7186 return self.expression(exp.Analyze) 7187 7188 options = [] 7189 while self._match_texts(self.ANALYZE_STYLES): 7190 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7191 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7192 else: 7193 options.append(self._prev.text.upper()) 7194 7195 this: t.Optional[exp.Expression] = None 7196 inner_expression: t.Optional[exp.Expression] = None 7197 7198 kind = self._curr and self._curr.text.upper() 7199 7200 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7201 this = self._parse_table_parts() 7202 elif self._match_text_seq("TABLES"): 7203 if self._match_set((TokenType.FROM, TokenType.IN)): 7204 kind = f"{kind} {self._prev.text.upper()}" 7205 this = self._parse_table(schema=True, is_db_reference=True) 7206 elif self._match_text_seq("DATABASE"): 7207 this = self._parse_table(schema=True, is_db_reference=True) 7208 elif self._match_text_seq("CLUSTER"): 7209 this = self._parse_table() 7210 # Try matching inner expr keywords before fallback to parse table. 7211 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7212 kind = None 7213 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7214 else: 7215 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7216 kind = None 7217 this = self._parse_table_parts() 7218 7219 partition = self._try_parse(self._parse_partition) 7220 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7221 return self._parse_as_command(start) 7222 7223 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7224 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7225 "WITH", "ASYNC", "MODE" 7226 ): 7227 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7228 else: 7229 mode = None 7230 7231 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7232 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7233 7234 properties = self._parse_properties() 7235 return self.expression( 7236 exp.Analyze, 7237 kind=kind, 7238 this=this, 7239 mode=mode, 7240 partition=partition, 7241 properties=properties, 7242 expression=inner_expression, 7243 options=options, 7244 ) 7245 7246 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7247 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7248 this = None 7249 kind = self._prev.text.upper() 7250 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7251 expressions = [] 7252 7253 if not self._match_text_seq("STATISTICS"): 7254 self.raise_error("Expecting token STATISTICS") 7255 7256 if self._match_text_seq("NOSCAN"): 7257 this = "NOSCAN" 7258 elif self._match(TokenType.FOR): 7259 if self._match_text_seq("ALL", "COLUMNS"): 7260 this = "FOR ALL COLUMNS" 7261 if self._match_texts("COLUMNS"): 7262 this = "FOR COLUMNS" 7263 expressions = self._parse_csv(self._parse_column_reference) 7264 elif self._match_text_seq("SAMPLE"): 7265 sample = self._parse_number() 7266 expressions = [ 7267 self.expression( 7268 exp.AnalyzeSample, 7269 sample=sample, 7270 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7271 ) 7272 ] 7273 7274 return self.expression( 7275 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7276 ) 7277 7278 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7279 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7280 kind = None 7281 this = None 7282 expression: t.Optional[exp.Expression] = None 7283 if self._match_text_seq("REF", "UPDATE"): 7284 kind = "REF" 7285 this = "UPDATE" 7286 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7287 this = "UPDATE SET DANGLING TO NULL" 7288 elif self._match_text_seq("STRUCTURE"): 7289 kind = "STRUCTURE" 7290 if self._match_text_seq("CASCADE", "FAST"): 7291 this = "CASCADE FAST" 7292 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7293 ("ONLINE", "OFFLINE") 7294 ): 7295 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7296 expression = self._parse_into() 7297 7298 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7299 7300 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7301 this = self._prev.text.upper() 7302 if self._match_text_seq("COLUMNS"): 7303 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7304 return None 7305 7306 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7307 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7308 if self._match_text_seq("STATISTICS"): 7309 return self.expression(exp.AnalyzeDelete, kind=kind) 7310 return None 7311 7312 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7313 if self._match_text_seq("CHAINED", "ROWS"): 7314 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7315 return None 7316 7317 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7318 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7319 this = self._prev.text.upper() 7320 expression: t.Optional[exp.Expression] = None 7321 expressions = [] 7322 update_options = None 7323 7324 if self._match_text_seq("HISTOGRAM", "ON"): 7325 expressions = self._parse_csv(self._parse_column_reference) 7326 with_expressions = [] 7327 while self._match(TokenType.WITH): 7328 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7329 if self._match_texts(("SYNC", "ASYNC")): 7330 if self._match_text_seq("MODE", advance=False): 7331 with_expressions.append(f"{self._prev.text.upper()} MODE") 7332 self._advance() 7333 else: 7334 buckets = self._parse_number() 7335 if self._match_text_seq("BUCKETS"): 7336 with_expressions.append(f"{buckets} BUCKETS") 7337 if with_expressions: 7338 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7339 7340 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7341 TokenType.UPDATE, advance=False 7342 ): 7343 update_options = self._prev.text.upper() 7344 self._advance() 7345 elif self._match_text_seq("USING", "DATA"): 7346 expression = self.expression(exp.UsingData, this=self._parse_string()) 7347 7348 return self.expression( 7349 exp.AnalyzeHistogram, 7350 this=this, 7351 expressions=expressions, 7352 expression=expression, 7353 update_options=update_options, 7354 ) 7355 7356 def _parse_merge(self) -> exp.Merge: 7357 self._match(TokenType.INTO) 7358 target = self._parse_table() 7359 7360 if target and self._match(TokenType.ALIAS, advance=False): 7361 target.set("alias", self._parse_table_alias()) 7362 7363 self._match(TokenType.USING) 7364 using = self._parse_table() 7365 7366 self._match(TokenType.ON) 7367 on = self._parse_assignment() 7368 7369 return self.expression( 7370 exp.Merge, 7371 this=target, 7372 using=using, 7373 on=on, 7374 whens=self._parse_when_matched(), 7375 returning=self._parse_returning(), 7376 ) 7377 7378 def _parse_when_matched(self) -> exp.Whens: 7379 whens = [] 7380 7381 while self._match(TokenType.WHEN): 7382 matched = not self._match(TokenType.NOT) 7383 self._match_text_seq("MATCHED") 7384 source = ( 7385 False 7386 if self._match_text_seq("BY", "TARGET") 7387 else self._match_text_seq("BY", "SOURCE") 7388 ) 7389 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7390 7391 self._match(TokenType.THEN) 7392 7393 if self._match(TokenType.INSERT): 7394 this = self._parse_star() 7395 if this: 7396 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7397 else: 7398 then = self.expression( 7399 exp.Insert, 7400 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7401 expression=self._match_text_seq("VALUES") and self._parse_value(), 7402 ) 7403 elif self._match(TokenType.UPDATE): 7404 expressions = self._parse_star() 7405 if expressions: 7406 then = self.expression(exp.Update, expressions=expressions) 7407 else: 7408 then = self.expression( 7409 exp.Update, 7410 expressions=self._match(TokenType.SET) 7411 and self._parse_csv(self._parse_equality), 7412 ) 7413 elif self._match(TokenType.DELETE): 7414 then = self.expression(exp.Var, this=self._prev.text) 7415 else: 7416 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7417 7418 whens.append( 7419 self.expression( 7420 exp.When, 7421 matched=matched, 7422 source=source, 7423 condition=condition, 7424 then=then, 7425 ) 7426 ) 7427 return self.expression(exp.Whens, expressions=whens) 7428 7429 def _parse_show(self) -> t.Optional[exp.Expression]: 7430 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7431 if parser: 7432 return parser(self) 7433 return self._parse_as_command(self._prev) 7434 7435 def _parse_set_item_assignment( 7436 self, kind: t.Optional[str] = None 7437 ) -> t.Optional[exp.Expression]: 7438 index = self._index 7439 7440 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7441 return self._parse_set_transaction(global_=kind == "GLOBAL") 7442 7443 left = self._parse_primary() or self._parse_column() 7444 assignment_delimiter = self._match_texts(("=", "TO")) 7445 7446 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7447 self._retreat(index) 7448 return None 7449 7450 right = self._parse_statement() or self._parse_id_var() 7451 if isinstance(right, (exp.Column, exp.Identifier)): 7452 right = exp.var(right.name) 7453 7454 this = self.expression(exp.EQ, this=left, expression=right) 7455 return self.expression(exp.SetItem, this=this, kind=kind) 7456 7457 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7458 self._match_text_seq("TRANSACTION") 7459 characteristics = self._parse_csv( 7460 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7461 ) 7462 return self.expression( 7463 exp.SetItem, 7464 expressions=characteristics, 7465 kind="TRANSACTION", 7466 **{"global": global_}, # type: ignore 7467 ) 7468 7469 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7470 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7471 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7472 7473 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7474 index = self._index 7475 set_ = self.expression( 7476 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7477 ) 7478 7479 if self._curr: 7480 self._retreat(index) 7481 return self._parse_as_command(self._prev) 7482 7483 return set_ 7484 7485 def _parse_var_from_options( 7486 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7487 ) -> t.Optional[exp.Var]: 7488 start = self._curr 7489 if not start: 7490 return None 7491 7492 option = start.text.upper() 7493 continuations = options.get(option) 7494 7495 index = self._index 7496 self._advance() 7497 for keywords in continuations or []: 7498 if isinstance(keywords, str): 7499 keywords = (keywords,) 7500 7501 if self._match_text_seq(*keywords): 7502 option = f"{option} {' '.join(keywords)}" 7503 break 7504 else: 7505 if continuations or continuations is None: 7506 if raise_unmatched: 7507 self.raise_error(f"Unknown option {option}") 7508 7509 self._retreat(index) 7510 return None 7511 7512 return exp.var(option) 7513 7514 def _parse_as_command(self, start: Token) -> exp.Command: 7515 while self._curr: 7516 self._advance() 7517 text = self._find_sql(start, self._prev) 7518 size = len(start.text) 7519 self._warn_unsupported() 7520 return exp.Command(this=text[:size], expression=text[size:]) 7521 7522 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7523 settings = [] 7524 7525 self._match_l_paren() 7526 kind = self._parse_id_var() 7527 7528 if self._match(TokenType.L_PAREN): 7529 while True: 7530 key = self._parse_id_var() 7531 value = self._parse_primary() 7532 if not key and value is None: 7533 break 7534 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7535 self._match(TokenType.R_PAREN) 7536 7537 self._match_r_paren() 7538 7539 return self.expression( 7540 exp.DictProperty, 7541 this=this, 7542 kind=kind.this if kind else None, 7543 settings=settings, 7544 ) 7545 7546 def _parse_dict_range(self, this: str) -> exp.DictRange: 7547 self._match_l_paren() 7548 has_min = self._match_text_seq("MIN") 7549 if has_min: 7550 min = self._parse_var() or self._parse_primary() 7551 self._match_text_seq("MAX") 7552 max = self._parse_var() or self._parse_primary() 7553 else: 7554 max = self._parse_var() or self._parse_primary() 7555 min = exp.Literal.number(0) 7556 self._match_r_paren() 7557 return self.expression(exp.DictRange, this=this, min=min, max=max) 7558 7559 def _parse_comprehension( 7560 self, this: t.Optional[exp.Expression] 7561 ) -> t.Optional[exp.Comprehension]: 7562 index = self._index 7563 expression = self._parse_column() 7564 if not self._match(TokenType.IN): 7565 self._retreat(index - 1) 7566 return None 7567 iterator = self._parse_column() 7568 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7569 return self.expression( 7570 exp.Comprehension, 7571 this=this, 7572 expression=expression, 7573 iterator=iterator, 7574 condition=condition, 7575 ) 7576 7577 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7578 if self._match(TokenType.HEREDOC_STRING): 7579 return self.expression(exp.Heredoc, this=self._prev.text) 7580 7581 if not self._match_text_seq("$"): 7582 return None 7583 7584 tags = ["$"] 7585 tag_text = None 7586 7587 if self._is_connected(): 7588 self._advance() 7589 tags.append(self._prev.text.upper()) 7590 else: 7591 self.raise_error("No closing $ found") 7592 7593 if tags[-1] != "$": 7594 if self._is_connected() and self._match_text_seq("$"): 7595 tag_text = tags[-1] 7596 tags.append("$") 7597 else: 7598 self.raise_error("No closing $ found") 7599 7600 heredoc_start = self._curr 7601 7602 while self._curr: 7603 if self._match_text_seq(*tags, advance=False): 7604 this = self._find_sql(heredoc_start, self._prev) 7605 self._advance(len(tags)) 7606 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7607 7608 self._advance() 7609 7610 self.raise_error(f"No closing {''.join(tags)} found") 7611 return None 7612 7613 def _find_parser( 7614 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7615 ) -> t.Optional[t.Callable]: 7616 if not self._curr: 7617 return None 7618 7619 index = self._index 7620 this = [] 7621 while True: 7622 # The current token might be multiple words 7623 curr = self._curr.text.upper() 7624 key = curr.split(" ") 7625 this.append(curr) 7626 7627 self._advance() 7628 result, trie = in_trie(trie, key) 7629 if result == TrieResult.FAILED: 7630 break 7631 7632 if result == TrieResult.EXISTS: 7633 subparser = parsers[" ".join(this)] 7634 return subparser 7635 7636 self._retreat(index) 7637 return None 7638 7639 def _match(self, token_type, advance=True, expression=None): 7640 if not self._curr: 7641 return None 7642 7643 if self._curr.token_type == token_type: 7644 if advance: 7645 self._advance() 7646 self._add_comments(expression) 7647 return True 7648 7649 return None 7650 7651 def _match_set(self, types, advance=True): 7652 if not self._curr: 7653 return None 7654 7655 if self._curr.token_type in types: 7656 if advance: 7657 self._advance() 7658 return True 7659 7660 return None 7661 7662 def _match_pair(self, token_type_a, token_type_b, advance=True): 7663 if not self._curr or not self._next: 7664 return None 7665 7666 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7667 if advance: 7668 self._advance(2) 7669 return True 7670 7671 return None 7672 7673 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7674 if not self._match(TokenType.L_PAREN, expression=expression): 7675 self.raise_error("Expecting (") 7676 7677 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7678 if not self._match(TokenType.R_PAREN, expression=expression): 7679 self.raise_error("Expecting )") 7680 7681 def _match_texts(self, texts, advance=True): 7682 if ( 7683 self._curr 7684 and self._curr.token_type != TokenType.STRING 7685 and self._curr.text.upper() in texts 7686 ): 7687 if advance: 7688 self._advance() 7689 return True 7690 return None 7691 7692 def _match_text_seq(self, *texts, advance=True): 7693 index = self._index 7694 for text in texts: 7695 if ( 7696 self._curr 7697 and self._curr.token_type != TokenType.STRING 7698 and self._curr.text.upper() == text 7699 ): 7700 self._advance() 7701 else: 7702 self._retreat(index) 7703 return None 7704 7705 if not advance: 7706 self._retreat(index) 7707 7708 return True 7709 7710 def _replace_lambda( 7711 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7712 ) -> t.Optional[exp.Expression]: 7713 if not node: 7714 return node 7715 7716 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7717 7718 for column in node.find_all(exp.Column): 7719 typ = lambda_types.get(column.parts[0].name) 7720 if typ is not None: 7721 dot_or_id = column.to_dot() if column.table else column.this 7722 7723 if typ: 7724 dot_or_id = self.expression( 7725 exp.Cast, 7726 this=dot_or_id, 7727 to=typ, 7728 ) 7729 7730 parent = column.parent 7731 7732 while isinstance(parent, exp.Dot): 7733 if not isinstance(parent.parent, exp.Dot): 7734 parent.replace(dot_or_id) 7735 break 7736 parent = parent.parent 7737 else: 7738 if column is node: 7739 node = dot_or_id 7740 else: 7741 column.replace(dot_or_id) 7742 return node 7743 7744 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7745 start = self._prev 7746 7747 # Not to be confused with TRUNCATE(number, decimals) function call 7748 if self._match(TokenType.L_PAREN): 7749 self._retreat(self._index - 2) 7750 return self._parse_function() 7751 7752 # Clickhouse supports TRUNCATE DATABASE as well 7753 is_database = self._match(TokenType.DATABASE) 7754 7755 self._match(TokenType.TABLE) 7756 7757 exists = self._parse_exists(not_=False) 7758 7759 expressions = self._parse_csv( 7760 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7761 ) 7762 7763 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7764 7765 if self._match_text_seq("RESTART", "IDENTITY"): 7766 identity = "RESTART" 7767 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7768 identity = "CONTINUE" 7769 else: 7770 identity = None 7771 7772 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7773 option = self._prev.text 7774 else: 7775 option = None 7776 7777 partition = self._parse_partition() 7778 7779 # Fallback case 7780 if self._curr: 7781 return self._parse_as_command(start) 7782 7783 return self.expression( 7784 exp.TruncateTable, 7785 expressions=expressions, 7786 is_database=is_database, 7787 exists=exists, 7788 cluster=cluster, 7789 identity=identity, 7790 option=option, 7791 partition=partition, 7792 ) 7793 7794 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7795 this = self._parse_ordered(self._parse_opclass) 7796 7797 if not self._match(TokenType.WITH): 7798 return this 7799 7800 op = self._parse_var(any_token=True) 7801 7802 return self.expression(exp.WithOperator, this=this, op=op) 7803 7804 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7805 self._match(TokenType.EQ) 7806 self._match(TokenType.L_PAREN) 7807 7808 opts: t.List[t.Optional[exp.Expression]] = [] 7809 while self._curr and not self._match(TokenType.R_PAREN): 7810 if self._match_text_seq("FORMAT_NAME", "="): 7811 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7812 # so we parse it separately to use _parse_field() 7813 prop = self.expression( 7814 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7815 ) 7816 opts.append(prop) 7817 else: 7818 opts.append(self._parse_property()) 7819 7820 self._match(TokenType.COMMA) 7821 7822 return opts 7823 7824 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7825 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7826 7827 options = [] 7828 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7829 option = self._parse_var(any_token=True) 7830 prev = self._prev.text.upper() 7831 7832 # Different dialects might separate options and values by white space, "=" and "AS" 7833 self._match(TokenType.EQ) 7834 self._match(TokenType.ALIAS) 7835 7836 param = self.expression(exp.CopyParameter, this=option) 7837 7838 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7839 TokenType.L_PAREN, advance=False 7840 ): 7841 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7842 param.set("expressions", self._parse_wrapped_options()) 7843 elif prev == "FILE_FORMAT": 7844 # T-SQL's external file format case 7845 param.set("expression", self._parse_field()) 7846 else: 7847 param.set("expression", self._parse_unquoted_field()) 7848 7849 options.append(param) 7850 self._match(sep) 7851 7852 return options 7853 7854 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7855 expr = self.expression(exp.Credentials) 7856 7857 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7858 expr.set("storage", self._parse_field()) 7859 if self._match_text_seq("CREDENTIALS"): 7860 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7861 creds = ( 7862 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7863 ) 7864 expr.set("credentials", creds) 7865 if self._match_text_seq("ENCRYPTION"): 7866 expr.set("encryption", self._parse_wrapped_options()) 7867 if self._match_text_seq("IAM_ROLE"): 7868 expr.set("iam_role", self._parse_field()) 7869 if self._match_text_seq("REGION"): 7870 expr.set("region", self._parse_field()) 7871 7872 return expr 7873 7874 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7875 return self._parse_field() 7876 7877 def _parse_copy(self) -> exp.Copy | exp.Command: 7878 start = self._prev 7879 7880 self._match(TokenType.INTO) 7881 7882 this = ( 7883 self._parse_select(nested=True, parse_subquery_alias=False) 7884 if self._match(TokenType.L_PAREN, advance=False) 7885 else self._parse_table(schema=True) 7886 ) 7887 7888 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7889 7890 files = self._parse_csv(self._parse_file_location) 7891 credentials = self._parse_credentials() 7892 7893 self._match_text_seq("WITH") 7894 7895 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7896 7897 # Fallback case 7898 if self._curr: 7899 return self._parse_as_command(start) 7900 7901 return self.expression( 7902 exp.Copy, 7903 this=this, 7904 kind=kind, 7905 credentials=credentials, 7906 files=files, 7907 params=params, 7908 ) 7909 7910 def _parse_normalize(self) -> exp.Normalize: 7911 return self.expression( 7912 exp.Normalize, 7913 this=self._parse_bitwise(), 7914 form=self._match(TokenType.COMMA) and self._parse_var(), 7915 ) 7916 7917 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7918 args = self._parse_csv(lambda: self._parse_lambda()) 7919 7920 this = seq_get(args, 0) 7921 decimals = seq_get(args, 1) 7922 7923 return expr_type( 7924 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7925 ) 7926 7927 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7928 if self._match_text_seq("COLUMNS", "(", advance=False): 7929 this = self._parse_function() 7930 if isinstance(this, exp.Columns): 7931 this.set("unpack", True) 7932 return this 7933 7934 return self.expression( 7935 exp.Star, 7936 **{ # type: ignore 7937 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7938 "replace": self._parse_star_op("REPLACE"), 7939 "rename": self._parse_star_op("RENAME"), 7940 }, 7941 ) 7942 7943 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7944 privilege_parts = [] 7945 7946 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7947 # (end of privilege list) or L_PAREN (start of column list) are met 7948 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7949 privilege_parts.append(self._curr.text.upper()) 7950 self._advance() 7951 7952 this = exp.var(" ".join(privilege_parts)) 7953 expressions = ( 7954 self._parse_wrapped_csv(self._parse_column) 7955 if self._match(TokenType.L_PAREN, advance=False) 7956 else None 7957 ) 7958 7959 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7960 7961 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7962 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7963 principal = self._parse_id_var() 7964 7965 if not principal: 7966 return None 7967 7968 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7969 7970 def _parse_grant(self) -> exp.Grant | exp.Command: 7971 start = self._prev 7972 7973 privileges = self._parse_csv(self._parse_grant_privilege) 7974 7975 self._match(TokenType.ON) 7976 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7977 7978 # Attempt to parse the securable e.g. MySQL allows names 7979 # such as "foo.*", "*.*" which are not easily parseable yet 7980 securable = self._try_parse(self._parse_table_parts) 7981 7982 if not securable or not self._match_text_seq("TO"): 7983 return self._parse_as_command(start) 7984 7985 principals = self._parse_csv(self._parse_grant_principal) 7986 7987 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7988 7989 if self._curr: 7990 return self._parse_as_command(start) 7991 7992 return self.expression( 7993 exp.Grant, 7994 privileges=privileges, 7995 kind=kind, 7996 securable=securable, 7997 principals=principals, 7998 grant_option=grant_option, 7999 ) 8000 8001 def _parse_overlay(self) -> exp.Overlay: 8002 return self.expression( 8003 exp.Overlay, 8004 **{ # type: ignore 8005 "this": self._parse_bitwise(), 8006 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8007 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8008 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8009 }, 8010 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1451 def __init__( 1452 self, 1453 error_level: t.Optional[ErrorLevel] = None, 1454 error_message_context: int = 100, 1455 max_errors: int = 3, 1456 dialect: DialectType = None, 1457 ): 1458 from sqlglot.dialects import Dialect 1459 1460 self.error_level = error_level or ErrorLevel.IMMEDIATE 1461 self.error_message_context = error_message_context 1462 self.max_errors = max_errors 1463 self.dialect = Dialect.get_or_raise(dialect) 1464 self.reset()
1476 def parse( 1477 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1478 ) -> t.List[t.Optional[exp.Expression]]: 1479 """ 1480 Parses a list of tokens and returns a list of syntax trees, one tree 1481 per parsed SQL statement. 1482 1483 Args: 1484 raw_tokens: The list of tokens. 1485 sql: The original SQL string, used to produce helpful debug messages. 1486 1487 Returns: 1488 The list of the produced syntax trees. 1489 """ 1490 return self._parse( 1491 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1492 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1494 def parse_into( 1495 self, 1496 expression_types: exp.IntoType, 1497 raw_tokens: t.List[Token], 1498 sql: t.Optional[str] = None, 1499 ) -> t.List[t.Optional[exp.Expression]]: 1500 """ 1501 Parses a list of tokens into a given Expression type. If a collection of Expression 1502 types is given instead, this method will try to parse the token list into each one 1503 of them, stopping at the first for which the parsing succeeds. 1504 1505 Args: 1506 expression_types: The expression type(s) to try and parse the token list into. 1507 raw_tokens: The list of tokens. 1508 sql: The original SQL string, used to produce helpful debug messages. 1509 1510 Returns: 1511 The target Expression. 1512 """ 1513 errors = [] 1514 for expression_type in ensure_list(expression_types): 1515 parser = self.EXPRESSION_PARSERS.get(expression_type) 1516 if not parser: 1517 raise TypeError(f"No parser registered for {expression_type}") 1518 1519 try: 1520 return self._parse(parser, raw_tokens, sql) 1521 except ParseError as e: 1522 e.errors[0]["into_expression"] = expression_type 1523 errors.append(e) 1524 1525 raise ParseError( 1526 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1527 errors=merge_errors(errors), 1528 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1568 def check_errors(self) -> None: 1569 """Logs or raises any found errors, depending on the chosen error level setting.""" 1570 if self.error_level == ErrorLevel.WARN: 1571 for error in self.errors: 1572 logger.error(str(error)) 1573 elif self.error_level == ErrorLevel.RAISE and self.errors: 1574 raise ParseError( 1575 concat_messages(self.errors, self.max_errors), 1576 errors=merge_errors(self.errors), 1577 )
Logs or raises any found errors, depending on the chosen error level setting.
1579 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1580 """ 1581 Appends an error in the list of recorded errors or raises it, depending on the chosen 1582 error level setting. 1583 """ 1584 token = token or self._curr or self._prev or Token.string("") 1585 start = token.start 1586 end = token.end + 1 1587 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1588 highlight = self.sql[start:end] 1589 end_context = self.sql[end : end + self.error_message_context] 1590 1591 error = ParseError.new( 1592 f"{message}. Line {token.line}, Col: {token.col}.\n" 1593 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1594 description=message, 1595 line=token.line, 1596 col=token.col, 1597 start_context=start_context, 1598 highlight=highlight, 1599 end_context=end_context, 1600 ) 1601 1602 if self.error_level == ErrorLevel.IMMEDIATE: 1603 raise error 1604 1605 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1607 def expression( 1608 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1609 ) -> E: 1610 """ 1611 Creates a new, validated Expression. 1612 1613 Args: 1614 exp_class: The expression class to instantiate. 1615 comments: An optional list of comments to attach to the expression. 1616 kwargs: The arguments to set for the expression along with their respective values. 1617 1618 Returns: 1619 The target expression. 1620 """ 1621 instance = exp_class(**kwargs) 1622 instance.add_comments(comments) if comments else self._add_comments(instance) 1623 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1630 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1631 """ 1632 Validates an Expression, making sure that all its mandatory arguments are set. 1633 1634 Args: 1635 expression: The expression to validate. 1636 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1637 1638 Returns: 1639 The validated expression. 1640 """ 1641 if self.error_level != ErrorLevel.IGNORE: 1642 for error_message in expression.error_messages(args): 1643 self.raise_error(error_message) 1644 1645 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.