sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME2, 336 TokenType.DATETIME64, 337 TokenType.SMALLDATETIME, 338 TokenType.DATE, 339 TokenType.DATE32, 340 TokenType.INT4RANGE, 341 TokenType.INT4MULTIRANGE, 342 TokenType.INT8RANGE, 343 TokenType.INT8MULTIRANGE, 344 TokenType.NUMRANGE, 345 TokenType.NUMMULTIRANGE, 346 TokenType.TSRANGE, 347 TokenType.TSMULTIRANGE, 348 TokenType.TSTZRANGE, 349 TokenType.TSTZMULTIRANGE, 350 TokenType.DATERANGE, 351 TokenType.DATEMULTIRANGE, 352 TokenType.DECIMAL, 353 TokenType.DECIMAL32, 354 TokenType.DECIMAL64, 355 TokenType.DECIMAL128, 356 TokenType.DECIMAL256, 357 TokenType.UDECIMAL, 358 TokenType.BIGDECIMAL, 359 TokenType.UUID, 360 TokenType.GEOGRAPHY, 361 TokenType.GEOMETRY, 362 TokenType.POINT, 363 TokenType.RING, 364 TokenType.LINESTRING, 365 TokenType.MULTILINESTRING, 366 TokenType.POLYGON, 367 TokenType.MULTIPOLYGON, 368 TokenType.HLLSKETCH, 369 TokenType.HSTORE, 370 TokenType.PSEUDO_TYPE, 371 TokenType.SUPER, 372 TokenType.SERIAL, 373 TokenType.SMALLSERIAL, 374 TokenType.BIGSERIAL, 375 TokenType.XML, 376 TokenType.YEAR, 377 TokenType.UNIQUEIDENTIFIER, 378 TokenType.USERDEFINED, 379 TokenType.MONEY, 380 TokenType.SMALLMONEY, 381 TokenType.ROWVERSION, 382 TokenType.IMAGE, 383 TokenType.VARIANT, 384 TokenType.VECTOR, 385 TokenType.OBJECT, 386 TokenType.OBJECT_IDENTIFIER, 387 TokenType.INET, 388 TokenType.IPADDRESS, 389 TokenType.IPPREFIX, 390 TokenType.IPV4, 391 TokenType.IPV6, 392 TokenType.UNKNOWN, 393 TokenType.NULL, 394 TokenType.NAME, 395 TokenType.TDIGEST, 396 *ENUM_TYPE_TOKENS, 397 *NESTED_TYPE_TOKENS, 398 *AGGREGATE_TYPE_TOKENS, 399 } 400 401 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 402 TokenType.BIGINT: TokenType.UBIGINT, 403 TokenType.INT: TokenType.UINT, 404 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 405 TokenType.SMALLINT: TokenType.USMALLINT, 406 TokenType.TINYINT: TokenType.UTINYINT, 407 TokenType.DECIMAL: TokenType.UDECIMAL, 408 } 409 410 SUBQUERY_PREDICATES = { 411 TokenType.ANY: exp.Any, 412 TokenType.ALL: exp.All, 413 TokenType.EXISTS: exp.Exists, 414 TokenType.SOME: exp.Any, 415 } 416 417 RESERVED_TOKENS = { 418 *Tokenizer.SINGLE_TOKENS.values(), 419 TokenType.SELECT, 420 } - {TokenType.IDENTIFIER} 421 422 DB_CREATABLES = { 423 TokenType.DATABASE, 424 TokenType.DICTIONARY, 425 TokenType.MODEL, 426 TokenType.SCHEMA, 427 TokenType.SEQUENCE, 428 TokenType.STORAGE_INTEGRATION, 429 TokenType.TABLE, 430 TokenType.TAG, 431 TokenType.VIEW, 432 TokenType.WAREHOUSE, 433 TokenType.STREAMLIT, 434 TokenType.SINK, 435 TokenType.SOURCE, 436 } 437 438 CREATABLES = { 439 TokenType.COLUMN, 440 TokenType.CONSTRAINT, 441 TokenType.FOREIGN_KEY, 442 TokenType.FUNCTION, 443 TokenType.INDEX, 444 TokenType.PROCEDURE, 445 *DB_CREATABLES, 446 } 447 448 ALTERABLES = { 449 TokenType.INDEX, 450 TokenType.TABLE, 451 TokenType.VIEW, 452 } 453 454 # Tokens that can represent identifiers 455 ID_VAR_TOKENS = { 456 TokenType.ALL, 457 TokenType.ATTACH, 458 TokenType.VAR, 459 TokenType.ANTI, 460 TokenType.APPLY, 461 TokenType.ASC, 462 TokenType.ASOF, 463 TokenType.AUTO_INCREMENT, 464 TokenType.BEGIN, 465 TokenType.BPCHAR, 466 TokenType.CACHE, 467 TokenType.CASE, 468 TokenType.COLLATE, 469 TokenType.COMMAND, 470 TokenType.COMMENT, 471 TokenType.COMMIT, 472 TokenType.CONSTRAINT, 473 TokenType.COPY, 474 TokenType.CUBE, 475 TokenType.DEFAULT, 476 TokenType.DELETE, 477 TokenType.DESC, 478 TokenType.DESCRIBE, 479 TokenType.DETACH, 480 TokenType.DICTIONARY, 481 TokenType.DIV, 482 TokenType.END, 483 TokenType.EXECUTE, 484 TokenType.ESCAPE, 485 TokenType.FALSE, 486 TokenType.FIRST, 487 TokenType.FILTER, 488 TokenType.FINAL, 489 TokenType.FORMAT, 490 TokenType.FULL, 491 TokenType.IDENTIFIER, 492 TokenType.IS, 493 TokenType.ISNULL, 494 TokenType.INTERVAL, 495 TokenType.KEEP, 496 TokenType.KILL, 497 TokenType.LEFT, 498 TokenType.LOAD, 499 TokenType.MERGE, 500 TokenType.NATURAL, 501 TokenType.NEXT, 502 TokenType.OFFSET, 503 TokenType.OPERATOR, 504 TokenType.ORDINALITY, 505 TokenType.OVERLAPS, 506 TokenType.OVERWRITE, 507 TokenType.PARTITION, 508 TokenType.PERCENT, 509 TokenType.PIVOT, 510 TokenType.PRAGMA, 511 TokenType.RANGE, 512 TokenType.RECURSIVE, 513 TokenType.REFERENCES, 514 TokenType.REFRESH, 515 TokenType.RENAME, 516 TokenType.REPLACE, 517 TokenType.RIGHT, 518 TokenType.ROLLUP, 519 TokenType.ROW, 520 TokenType.ROWS, 521 TokenType.SEMI, 522 TokenType.SET, 523 TokenType.SETTINGS, 524 TokenType.SHOW, 525 TokenType.TEMPORARY, 526 TokenType.TOP, 527 TokenType.TRUE, 528 TokenType.TRUNCATE, 529 TokenType.UNIQUE, 530 TokenType.UNNEST, 531 TokenType.UNPIVOT, 532 TokenType.UPDATE, 533 TokenType.USE, 534 TokenType.VOLATILE, 535 TokenType.WINDOW, 536 *CREATABLES, 537 *SUBQUERY_PREDICATES, 538 *TYPE_TOKENS, 539 *NO_PAREN_FUNCTIONS, 540 } 541 ID_VAR_TOKENS.remove(TokenType.UNION) 542 543 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 544 545 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 546 TokenType.ANTI, 547 TokenType.APPLY, 548 TokenType.ASOF, 549 TokenType.FULL, 550 TokenType.LEFT, 551 TokenType.LOCK, 552 TokenType.NATURAL, 553 TokenType.OFFSET, 554 TokenType.RIGHT, 555 TokenType.SEMI, 556 TokenType.WINDOW, 557 } 558 559 ALIAS_TOKENS = ID_VAR_TOKENS 560 561 ARRAY_CONSTRUCTORS = { 562 "ARRAY": exp.Array, 563 "LIST": exp.List, 564 } 565 566 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 567 568 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 569 570 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 571 572 FUNC_TOKENS = { 573 TokenType.COLLATE, 574 TokenType.COMMAND, 575 TokenType.CURRENT_DATE, 576 TokenType.CURRENT_DATETIME, 577 TokenType.CURRENT_TIMESTAMP, 578 TokenType.CURRENT_TIME, 579 TokenType.CURRENT_USER, 580 TokenType.FILTER, 581 TokenType.FIRST, 582 TokenType.FORMAT, 583 TokenType.GLOB, 584 TokenType.IDENTIFIER, 585 TokenType.INDEX, 586 TokenType.ISNULL, 587 TokenType.ILIKE, 588 TokenType.INSERT, 589 TokenType.LIKE, 590 TokenType.MERGE, 591 TokenType.NEXT, 592 TokenType.OFFSET, 593 TokenType.PRIMARY_KEY, 594 TokenType.RANGE, 595 TokenType.REPLACE, 596 TokenType.RLIKE, 597 TokenType.ROW, 598 TokenType.UNNEST, 599 TokenType.VAR, 600 TokenType.LEFT, 601 TokenType.RIGHT, 602 TokenType.SEQUENCE, 603 TokenType.DATE, 604 TokenType.DATETIME, 605 TokenType.TABLE, 606 TokenType.TIMESTAMP, 607 TokenType.TIMESTAMPTZ, 608 TokenType.TRUNCATE, 609 TokenType.WINDOW, 610 TokenType.XOR, 611 *TYPE_TOKENS, 612 *SUBQUERY_PREDICATES, 613 } 614 615 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 616 TokenType.AND: exp.And, 617 } 618 619 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 620 TokenType.COLON_EQ: exp.PropertyEQ, 621 } 622 623 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 624 TokenType.OR: exp.Or, 625 } 626 627 EQUALITY = { 628 TokenType.EQ: exp.EQ, 629 TokenType.NEQ: exp.NEQ, 630 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 631 } 632 633 COMPARISON = { 634 TokenType.GT: exp.GT, 635 TokenType.GTE: exp.GTE, 636 TokenType.LT: exp.LT, 637 TokenType.LTE: exp.LTE, 638 } 639 640 BITWISE = { 641 TokenType.AMP: exp.BitwiseAnd, 642 TokenType.CARET: exp.BitwiseXor, 643 TokenType.PIPE: exp.BitwiseOr, 644 } 645 646 TERM = { 647 TokenType.DASH: exp.Sub, 648 TokenType.PLUS: exp.Add, 649 TokenType.MOD: exp.Mod, 650 TokenType.COLLATE: exp.Collate, 651 } 652 653 FACTOR = { 654 TokenType.DIV: exp.IntDiv, 655 TokenType.LR_ARROW: exp.Distance, 656 TokenType.SLASH: exp.Div, 657 TokenType.STAR: exp.Mul, 658 } 659 660 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 661 662 TIMES = { 663 TokenType.TIME, 664 TokenType.TIMETZ, 665 } 666 667 TIMESTAMPS = { 668 TokenType.TIMESTAMP, 669 TokenType.TIMESTAMPTZ, 670 TokenType.TIMESTAMPLTZ, 671 *TIMES, 672 } 673 674 SET_OPERATIONS = { 675 TokenType.UNION, 676 TokenType.INTERSECT, 677 TokenType.EXCEPT, 678 } 679 680 JOIN_METHODS = { 681 TokenType.ASOF, 682 TokenType.NATURAL, 683 TokenType.POSITIONAL, 684 } 685 686 JOIN_SIDES = { 687 TokenType.LEFT, 688 TokenType.RIGHT, 689 TokenType.FULL, 690 } 691 692 JOIN_KINDS = { 693 TokenType.ANTI, 694 TokenType.CROSS, 695 TokenType.INNER, 696 TokenType.OUTER, 697 TokenType.SEMI, 698 TokenType.STRAIGHT_JOIN, 699 } 700 701 JOIN_HINTS: t.Set[str] = set() 702 703 LAMBDAS = { 704 TokenType.ARROW: lambda self, expressions: self.expression( 705 exp.Lambda, 706 this=self._replace_lambda( 707 self._parse_assignment(), 708 expressions, 709 ), 710 expressions=expressions, 711 ), 712 TokenType.FARROW: lambda self, expressions: self.expression( 713 exp.Kwarg, 714 this=exp.var(expressions[0].name), 715 expression=self._parse_assignment(), 716 ), 717 } 718 719 COLUMN_OPERATORS = { 720 TokenType.DOT: None, 721 TokenType.DCOLON: lambda self, this, to: self.expression( 722 exp.Cast if self.STRICT_CAST else exp.TryCast, 723 this=this, 724 to=to, 725 ), 726 TokenType.ARROW: lambda self, this, path: self.expression( 727 exp.JSONExtract, 728 this=this, 729 expression=self.dialect.to_json_path(path), 730 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 731 ), 732 TokenType.DARROW: lambda self, this, path: self.expression( 733 exp.JSONExtractScalar, 734 this=this, 735 expression=self.dialect.to_json_path(path), 736 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 737 ), 738 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 739 exp.JSONBExtract, 740 this=this, 741 expression=path, 742 ), 743 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 744 exp.JSONBExtractScalar, 745 this=this, 746 expression=path, 747 ), 748 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 749 exp.JSONBContains, 750 this=this, 751 expression=key, 752 ), 753 } 754 755 EXPRESSION_PARSERS = { 756 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 exp.Column: lambda self: self._parse_column(), 758 exp.Condition: lambda self: self._parse_assignment(), 759 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 760 exp.Expression: lambda self: self._parse_expression(), 761 exp.From: lambda self: self._parse_from(joins=True), 762 exp.Group: lambda self: self._parse_group(), 763 exp.Having: lambda self: self._parse_having(), 764 exp.Hint: lambda self: self._parse_hint_body(), 765 exp.Identifier: lambda self: self._parse_id_var(), 766 exp.Join: lambda self: self._parse_join(), 767 exp.Lambda: lambda self: self._parse_lambda(), 768 exp.Lateral: lambda self: self._parse_lateral(), 769 exp.Limit: lambda self: self._parse_limit(), 770 exp.Offset: lambda self: self._parse_offset(), 771 exp.Order: lambda self: self._parse_order(), 772 exp.Ordered: lambda self: self._parse_ordered(), 773 exp.Properties: lambda self: self._parse_properties(), 774 exp.Qualify: lambda self: self._parse_qualify(), 775 exp.Returning: lambda self: self._parse_returning(), 776 exp.Select: lambda self: self._parse_select(), 777 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 778 exp.Table: lambda self: self._parse_table_parts(), 779 exp.TableAlias: lambda self: self._parse_table_alias(), 780 exp.Tuple: lambda self: self._parse_value(), 781 exp.Whens: lambda self: self._parse_when_matched(), 782 exp.Where: lambda self: self._parse_where(), 783 exp.Window: lambda self: self._parse_named_window(), 784 exp.With: lambda self: self._parse_with(), 785 "JOIN_TYPE": lambda self: self._parse_join_parts(), 786 } 787 788 STATEMENT_PARSERS = { 789 TokenType.ALTER: lambda self: self._parse_alter(), 790 TokenType.BEGIN: lambda self: self._parse_transaction(), 791 TokenType.CACHE: lambda self: self._parse_cache(), 792 TokenType.COMMENT: lambda self: self._parse_comment(), 793 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 794 TokenType.COPY: lambda self: self._parse_copy(), 795 TokenType.CREATE: lambda self: self._parse_create(), 796 TokenType.DELETE: lambda self: self._parse_delete(), 797 TokenType.DESC: lambda self: self._parse_describe(), 798 TokenType.DESCRIBE: lambda self: self._parse_describe(), 799 TokenType.DROP: lambda self: self._parse_drop(), 800 TokenType.GRANT: lambda self: self._parse_grant(), 801 TokenType.INSERT: lambda self: self._parse_insert(), 802 TokenType.KILL: lambda self: self._parse_kill(), 803 TokenType.LOAD: lambda self: self._parse_load(), 804 TokenType.MERGE: lambda self: self._parse_merge(), 805 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 806 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 807 TokenType.REFRESH: lambda self: self._parse_refresh(), 808 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 809 TokenType.SET: lambda self: self._parse_set(), 810 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 811 TokenType.UNCACHE: lambda self: self._parse_uncache(), 812 TokenType.UPDATE: lambda self: self._parse_update(), 813 TokenType.USE: lambda self: self.expression( 814 exp.Use, 815 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 816 this=self._parse_table(schema=False), 817 ), 818 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 819 } 820 821 UNARY_PARSERS = { 822 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 823 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 824 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 825 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 826 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 827 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 828 } 829 830 STRING_PARSERS = { 831 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 832 exp.RawString, this=token.text 833 ), 834 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 835 exp.National, this=token.text 836 ), 837 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 838 TokenType.STRING: lambda self, token: self.expression( 839 exp.Literal, this=token.text, is_string=True 840 ), 841 TokenType.UNICODE_STRING: lambda self, token: self.expression( 842 exp.UnicodeString, 843 this=token.text, 844 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 845 ), 846 } 847 848 NUMERIC_PARSERS = { 849 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 850 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 851 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 852 TokenType.NUMBER: lambda self, token: self.expression( 853 exp.Literal, this=token.text, is_string=False 854 ), 855 } 856 857 PRIMARY_PARSERS = { 858 **STRING_PARSERS, 859 **NUMERIC_PARSERS, 860 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 861 TokenType.NULL: lambda self, _: self.expression(exp.Null), 862 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 863 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 864 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 865 TokenType.STAR: lambda self, _: self._parse_star_ops(), 866 } 867 868 PLACEHOLDER_PARSERS = { 869 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 870 TokenType.PARAMETER: lambda self: self._parse_parameter(), 871 TokenType.COLON: lambda self: ( 872 self.expression(exp.Placeholder, this=self._prev.text) 873 if self._match_set(self.ID_VAR_TOKENS) 874 else None 875 ), 876 } 877 878 RANGE_PARSERS = { 879 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 880 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 881 TokenType.GLOB: binary_range_parser(exp.Glob), 882 TokenType.ILIKE: binary_range_parser(exp.ILike), 883 TokenType.IN: lambda self, this: self._parse_in(this), 884 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 885 TokenType.IS: lambda self, this: self._parse_is(this), 886 TokenType.LIKE: binary_range_parser(exp.Like), 887 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 888 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 889 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 890 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 891 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 892 } 893 894 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 895 "ALLOWED_VALUES": lambda self: self.expression( 896 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 897 ), 898 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 899 "AUTO": lambda self: self._parse_auto_property(), 900 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 901 "BACKUP": lambda self: self.expression( 902 exp.BackupProperty, this=self._parse_var(any_token=True) 903 ), 904 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 905 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 906 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 907 "CHECKSUM": lambda self: self._parse_checksum(), 908 "CLUSTER BY": lambda self: self._parse_cluster(), 909 "CLUSTERED": lambda self: self._parse_clustered_by(), 910 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 911 exp.CollateProperty, **kwargs 912 ), 913 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 914 "CONTAINS": lambda self: self._parse_contains_property(), 915 "COPY": lambda self: self._parse_copy_property(), 916 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 917 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 918 "DEFINER": lambda self: self._parse_definer(), 919 "DETERMINISTIC": lambda self: self.expression( 920 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 921 ), 922 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 923 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 924 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 925 "DISTKEY": lambda self: self._parse_distkey(), 926 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 927 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 928 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 929 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 930 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 931 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 932 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 933 "FREESPACE": lambda self: self._parse_freespace(), 934 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 935 "HEAP": lambda self: self.expression(exp.HeapProperty), 936 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 937 "IMMUTABLE": lambda self: self.expression( 938 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 939 ), 940 "INHERITS": lambda self: self.expression( 941 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 942 ), 943 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 944 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 945 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 946 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 947 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 948 "LIKE": lambda self: self._parse_create_like(), 949 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 950 "LOCK": lambda self: self._parse_locking(), 951 "LOCKING": lambda self: self._parse_locking(), 952 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 953 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 954 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 955 "MODIFIES": lambda self: self._parse_modifies_property(), 956 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 957 "NO": lambda self: self._parse_no_property(), 958 "ON": lambda self: self._parse_on_property(), 959 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 960 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 961 "PARTITION": lambda self: self._parse_partitioned_of(), 962 "PARTITION BY": lambda self: self._parse_partitioned_by(), 963 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 964 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 965 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 966 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 967 "READS": lambda self: self._parse_reads_property(), 968 "REMOTE": lambda self: self._parse_remote_with_connection(), 969 "RETURNS": lambda self: self._parse_returns(), 970 "STRICT": lambda self: self.expression(exp.StrictProperty), 971 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 972 "ROW": lambda self: self._parse_row(), 973 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 974 "SAMPLE": lambda self: self.expression( 975 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 976 ), 977 "SECURE": lambda self: self.expression(exp.SecureProperty), 978 "SECURITY": lambda self: self._parse_security(), 979 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 980 "SETTINGS": lambda self: self._parse_settings_property(), 981 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 982 "SORTKEY": lambda self: self._parse_sortkey(), 983 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 984 "STABLE": lambda self: self.expression( 985 exp.StabilityProperty, this=exp.Literal.string("STABLE") 986 ), 987 "STORED": lambda self: self._parse_stored(), 988 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 989 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 990 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 991 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 992 "TO": lambda self: self._parse_to_table(), 993 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 994 "TRANSFORM": lambda self: self.expression( 995 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 996 ), 997 "TTL": lambda self: self._parse_ttl(), 998 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 999 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1000 "VOLATILE": lambda self: self._parse_volatile_property(), 1001 "WITH": lambda self: self._parse_with_property(), 1002 } 1003 1004 CONSTRAINT_PARSERS = { 1005 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1006 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1007 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1008 "CHARACTER SET": lambda self: self.expression( 1009 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1010 ), 1011 "CHECK": lambda self: self.expression( 1012 exp.CheckColumnConstraint, 1013 this=self._parse_wrapped(self._parse_assignment), 1014 enforced=self._match_text_seq("ENFORCED"), 1015 ), 1016 "COLLATE": lambda self: self.expression( 1017 exp.CollateColumnConstraint, 1018 this=self._parse_identifier() or self._parse_column(), 1019 ), 1020 "COMMENT": lambda self: self.expression( 1021 exp.CommentColumnConstraint, this=self._parse_string() 1022 ), 1023 "COMPRESS": lambda self: self._parse_compress(), 1024 "CLUSTERED": lambda self: self.expression( 1025 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1026 ), 1027 "NONCLUSTERED": lambda self: self.expression( 1028 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1029 ), 1030 "DEFAULT": lambda self: self.expression( 1031 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1032 ), 1033 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1034 "EPHEMERAL": lambda self: self.expression( 1035 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1036 ), 1037 "EXCLUDE": lambda self: self.expression( 1038 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1039 ), 1040 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1041 "FORMAT": lambda self: self.expression( 1042 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1043 ), 1044 "GENERATED": lambda self: self._parse_generated_as_identity(), 1045 "IDENTITY": lambda self: self._parse_auto_increment(), 1046 "INLINE": lambda self: self._parse_inline(), 1047 "LIKE": lambda self: self._parse_create_like(), 1048 "NOT": lambda self: self._parse_not_constraint(), 1049 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1050 "ON": lambda self: ( 1051 self._match(TokenType.UPDATE) 1052 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1053 ) 1054 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1055 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1056 "PERIOD": lambda self: self._parse_period_for_system_time(), 1057 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1058 "REFERENCES": lambda self: self._parse_references(match=False), 1059 "TITLE": lambda self: self.expression( 1060 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1061 ), 1062 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1063 "UNIQUE": lambda self: self._parse_unique(), 1064 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1065 "WATERMARK": lambda self: self.expression( 1066 exp.WatermarkColumnConstraint, 1067 this=self._match(TokenType.FOR) and self._parse_column(), 1068 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1069 ), 1070 "WITH": lambda self: self.expression( 1071 exp.Properties, expressions=self._parse_wrapped_properties() 1072 ), 1073 } 1074 1075 ALTER_PARSERS = { 1076 "ADD": lambda self: self._parse_alter_table_add(), 1077 "AS": lambda self: self._parse_select(), 1078 "ALTER": lambda self: self._parse_alter_table_alter(), 1079 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1080 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1081 "DROP": lambda self: self._parse_alter_table_drop(), 1082 "RENAME": lambda self: self._parse_alter_table_rename(), 1083 "SET": lambda self: self._parse_alter_table_set(), 1084 "SWAP": lambda self: self.expression( 1085 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1086 ), 1087 } 1088 1089 ALTER_ALTER_PARSERS = { 1090 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1091 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1092 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1093 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1094 } 1095 1096 SCHEMA_UNNAMED_CONSTRAINTS = { 1097 "CHECK", 1098 "EXCLUDE", 1099 "FOREIGN KEY", 1100 "LIKE", 1101 "PERIOD", 1102 "PRIMARY KEY", 1103 "UNIQUE", 1104 "WATERMARK", 1105 } 1106 1107 NO_PAREN_FUNCTION_PARSERS = { 1108 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1109 "CASE": lambda self: self._parse_case(), 1110 "CONNECT_BY_ROOT": lambda self: self.expression( 1111 exp.ConnectByRoot, this=self._parse_column() 1112 ), 1113 "IF": lambda self: self._parse_if(), 1114 } 1115 1116 INVALID_FUNC_NAME_TOKENS = { 1117 TokenType.IDENTIFIER, 1118 TokenType.STRING, 1119 } 1120 1121 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1122 1123 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1124 1125 FUNCTION_PARSERS = { 1126 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1127 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1128 "DECODE": lambda self: self._parse_decode(), 1129 "EXTRACT": lambda self: self._parse_extract(), 1130 "GAP_FILL": lambda self: self._parse_gap_fill(), 1131 "JSON_OBJECT": lambda self: self._parse_json_object(), 1132 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1133 "JSON_TABLE": lambda self: self._parse_json_table(), 1134 "MATCH": lambda self: self._parse_match_against(), 1135 "NORMALIZE": lambda self: self._parse_normalize(), 1136 "OPENJSON": lambda self: self._parse_open_json(), 1137 "OVERLAY": lambda self: self._parse_overlay(), 1138 "POSITION": lambda self: self._parse_position(), 1139 "PREDICT": lambda self: self._parse_predict(), 1140 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1141 "STRING_AGG": lambda self: self._parse_string_agg(), 1142 "SUBSTRING": lambda self: self._parse_substring(), 1143 "TRIM": lambda self: self._parse_trim(), 1144 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1145 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1146 "XMLELEMENT": lambda self: self.expression( 1147 exp.XMLElement, 1148 this=self._match_text_seq("NAME") and self._parse_id_var(), 1149 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1150 ), 1151 } 1152 1153 QUERY_MODIFIER_PARSERS = { 1154 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1155 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1156 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1157 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1158 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1159 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1160 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1161 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1162 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1163 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1164 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1165 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1166 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1167 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1168 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1169 TokenType.CLUSTER_BY: lambda self: ( 1170 "cluster", 1171 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1172 ), 1173 TokenType.DISTRIBUTE_BY: lambda self: ( 1174 "distribute", 1175 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1176 ), 1177 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1178 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1179 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1180 } 1181 1182 SET_PARSERS = { 1183 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1184 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1185 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1186 "TRANSACTION": lambda self: self._parse_set_transaction(), 1187 } 1188 1189 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1190 1191 TYPE_LITERAL_PARSERS = { 1192 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1193 } 1194 1195 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1196 1197 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1198 1199 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1200 1201 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1202 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1203 "ISOLATION": ( 1204 ("LEVEL", "REPEATABLE", "READ"), 1205 ("LEVEL", "READ", "COMMITTED"), 1206 ("LEVEL", "READ", "UNCOMITTED"), 1207 ("LEVEL", "SERIALIZABLE"), 1208 ), 1209 "READ": ("WRITE", "ONLY"), 1210 } 1211 1212 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1213 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1214 ) 1215 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1216 1217 CREATE_SEQUENCE: OPTIONS_TYPE = { 1218 "SCALE": ("EXTEND", "NOEXTEND"), 1219 "SHARD": ("EXTEND", "NOEXTEND"), 1220 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1221 **dict.fromkeys( 1222 ( 1223 "SESSION", 1224 "GLOBAL", 1225 "KEEP", 1226 "NOKEEP", 1227 "ORDER", 1228 "NOORDER", 1229 "NOCACHE", 1230 "CYCLE", 1231 "NOCYCLE", 1232 "NOMINVALUE", 1233 "NOMAXVALUE", 1234 "NOSCALE", 1235 "NOSHARD", 1236 ), 1237 tuple(), 1238 ), 1239 } 1240 1241 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1242 1243 USABLES: OPTIONS_TYPE = dict.fromkeys( 1244 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1245 ) 1246 1247 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1248 1249 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1250 "TYPE": ("EVOLUTION",), 1251 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1252 } 1253 1254 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1255 1256 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1257 1258 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1259 "NOT": ("ENFORCED",), 1260 "MATCH": ( 1261 "FULL", 1262 "PARTIAL", 1263 "SIMPLE", 1264 ), 1265 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1266 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1267 } 1268 1269 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1270 1271 CLONE_KEYWORDS = {"CLONE", "COPY"} 1272 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1273 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1274 1275 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1276 1277 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1278 1279 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1280 1281 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1282 1283 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1284 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1285 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1286 1287 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1288 1289 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1290 1291 ADD_CONSTRAINT_TOKENS = { 1292 TokenType.CONSTRAINT, 1293 TokenType.FOREIGN_KEY, 1294 TokenType.INDEX, 1295 TokenType.KEY, 1296 TokenType.PRIMARY_KEY, 1297 TokenType.UNIQUE, 1298 } 1299 1300 DISTINCT_TOKENS = {TokenType.DISTINCT} 1301 1302 NULL_TOKENS = {TokenType.NULL} 1303 1304 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1305 1306 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1307 1308 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1309 1310 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1311 1312 ODBC_DATETIME_LITERALS = { 1313 "d": exp.Date, 1314 "t": exp.Time, 1315 "ts": exp.Timestamp, 1316 } 1317 1318 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1319 1320 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1321 1322 # The style options for the DESCRIBE statement 1323 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1324 1325 OPERATION_MODIFIERS: t.Set[str] = set() 1326 1327 STRICT_CAST = True 1328 1329 PREFIXED_PIVOT_COLUMNS = False 1330 IDENTIFY_PIVOT_STRINGS = False 1331 1332 LOG_DEFAULTS_TO_LN = False 1333 1334 # Whether ADD is present for each column added by ALTER TABLE 1335 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1336 1337 # Whether the table sample clause expects CSV syntax 1338 TABLESAMPLE_CSV = False 1339 1340 # The default method used for table sampling 1341 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1342 1343 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1344 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1345 1346 # Whether the TRIM function expects the characters to trim as its first argument 1347 TRIM_PATTERN_FIRST = False 1348 1349 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1350 STRING_ALIASES = False 1351 1352 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1353 MODIFIERS_ATTACHED_TO_SET_OP = True 1354 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1355 1356 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1357 NO_PAREN_IF_COMMANDS = True 1358 1359 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1360 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1361 1362 # Whether the `:` operator is used to extract a value from a VARIANT column 1363 COLON_IS_VARIANT_EXTRACT = False 1364 1365 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1366 # If this is True and '(' is not found, the keyword will be treated as an identifier 1367 VALUES_FOLLOWED_BY_PAREN = True 1368 1369 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1370 SUPPORTS_IMPLICIT_UNNEST = False 1371 1372 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1373 INTERVAL_SPANS = True 1374 1375 # Whether a PARTITION clause can follow a table reference 1376 SUPPORTS_PARTITION_SELECTION = False 1377 1378 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1379 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1380 1381 __slots__ = ( 1382 "error_level", 1383 "error_message_context", 1384 "max_errors", 1385 "dialect", 1386 "sql", 1387 "errors", 1388 "_tokens", 1389 "_index", 1390 "_curr", 1391 "_next", 1392 "_prev", 1393 "_prev_comments", 1394 ) 1395 1396 # Autofilled 1397 SHOW_TRIE: t.Dict = {} 1398 SET_TRIE: t.Dict = {} 1399 1400 def __init__( 1401 self, 1402 error_level: t.Optional[ErrorLevel] = None, 1403 error_message_context: int = 100, 1404 max_errors: int = 3, 1405 dialect: DialectType = None, 1406 ): 1407 from sqlglot.dialects import Dialect 1408 1409 self.error_level = error_level or ErrorLevel.IMMEDIATE 1410 self.error_message_context = error_message_context 1411 self.max_errors = max_errors 1412 self.dialect = Dialect.get_or_raise(dialect) 1413 self.reset() 1414 1415 def reset(self): 1416 self.sql = "" 1417 self.errors = [] 1418 self._tokens = [] 1419 self._index = 0 1420 self._curr = None 1421 self._next = None 1422 self._prev = None 1423 self._prev_comments = None 1424 1425 def parse( 1426 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1427 ) -> t.List[t.Optional[exp.Expression]]: 1428 """ 1429 Parses a list of tokens and returns a list of syntax trees, one tree 1430 per parsed SQL statement. 1431 1432 Args: 1433 raw_tokens: The list of tokens. 1434 sql: The original SQL string, used to produce helpful debug messages. 1435 1436 Returns: 1437 The list of the produced syntax trees. 1438 """ 1439 return self._parse( 1440 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1441 ) 1442 1443 def parse_into( 1444 self, 1445 expression_types: exp.IntoType, 1446 raw_tokens: t.List[Token], 1447 sql: t.Optional[str] = None, 1448 ) -> t.List[t.Optional[exp.Expression]]: 1449 """ 1450 Parses a list of tokens into a given Expression type. If a collection of Expression 1451 types is given instead, this method will try to parse the token list into each one 1452 of them, stopping at the first for which the parsing succeeds. 1453 1454 Args: 1455 expression_types: The expression type(s) to try and parse the token list into. 1456 raw_tokens: The list of tokens. 1457 sql: The original SQL string, used to produce helpful debug messages. 1458 1459 Returns: 1460 The target Expression. 1461 """ 1462 errors = [] 1463 for expression_type in ensure_list(expression_types): 1464 parser = self.EXPRESSION_PARSERS.get(expression_type) 1465 if not parser: 1466 raise TypeError(f"No parser registered for {expression_type}") 1467 1468 try: 1469 return self._parse(parser, raw_tokens, sql) 1470 except ParseError as e: 1471 e.errors[0]["into_expression"] = expression_type 1472 errors.append(e) 1473 1474 raise ParseError( 1475 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1476 errors=merge_errors(errors), 1477 ) from errors[-1] 1478 1479 def _parse( 1480 self, 1481 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1482 raw_tokens: t.List[Token], 1483 sql: t.Optional[str] = None, 1484 ) -> t.List[t.Optional[exp.Expression]]: 1485 self.reset() 1486 self.sql = sql or "" 1487 1488 total = len(raw_tokens) 1489 chunks: t.List[t.List[Token]] = [[]] 1490 1491 for i, token in enumerate(raw_tokens): 1492 if token.token_type == TokenType.SEMICOLON: 1493 if token.comments: 1494 chunks.append([token]) 1495 1496 if i < total - 1: 1497 chunks.append([]) 1498 else: 1499 chunks[-1].append(token) 1500 1501 expressions = [] 1502 1503 for tokens in chunks: 1504 self._index = -1 1505 self._tokens = tokens 1506 self._advance() 1507 1508 expressions.append(parse_method(self)) 1509 1510 if self._index < len(self._tokens): 1511 self.raise_error("Invalid expression / Unexpected token") 1512 1513 self.check_errors() 1514 1515 return expressions 1516 1517 def check_errors(self) -> None: 1518 """Logs or raises any found errors, depending on the chosen error level setting.""" 1519 if self.error_level == ErrorLevel.WARN: 1520 for error in self.errors: 1521 logger.error(str(error)) 1522 elif self.error_level == ErrorLevel.RAISE and self.errors: 1523 raise ParseError( 1524 concat_messages(self.errors, self.max_errors), 1525 errors=merge_errors(self.errors), 1526 ) 1527 1528 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1529 """ 1530 Appends an error in the list of recorded errors or raises it, depending on the chosen 1531 error level setting. 1532 """ 1533 token = token or self._curr or self._prev or Token.string("") 1534 start = token.start 1535 end = token.end + 1 1536 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1537 highlight = self.sql[start:end] 1538 end_context = self.sql[end : end + self.error_message_context] 1539 1540 error = ParseError.new( 1541 f"{message}. Line {token.line}, Col: {token.col}.\n" 1542 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1543 description=message, 1544 line=token.line, 1545 col=token.col, 1546 start_context=start_context, 1547 highlight=highlight, 1548 end_context=end_context, 1549 ) 1550 1551 if self.error_level == ErrorLevel.IMMEDIATE: 1552 raise error 1553 1554 self.errors.append(error) 1555 1556 def expression( 1557 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1558 ) -> E: 1559 """ 1560 Creates a new, validated Expression. 1561 1562 Args: 1563 exp_class: The expression class to instantiate. 1564 comments: An optional list of comments to attach to the expression. 1565 kwargs: The arguments to set for the expression along with their respective values. 1566 1567 Returns: 1568 The target expression. 1569 """ 1570 instance = exp_class(**kwargs) 1571 instance.add_comments(comments) if comments else self._add_comments(instance) 1572 return self.validate_expression(instance) 1573 1574 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1575 if expression and self._prev_comments: 1576 expression.add_comments(self._prev_comments) 1577 self._prev_comments = None 1578 1579 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1580 """ 1581 Validates an Expression, making sure that all its mandatory arguments are set. 1582 1583 Args: 1584 expression: The expression to validate. 1585 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1586 1587 Returns: 1588 The validated expression. 1589 """ 1590 if self.error_level != ErrorLevel.IGNORE: 1591 for error_message in expression.error_messages(args): 1592 self.raise_error(error_message) 1593 1594 return expression 1595 1596 def _find_sql(self, start: Token, end: Token) -> str: 1597 return self.sql[start.start : end.end + 1] 1598 1599 def _is_connected(self) -> bool: 1600 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1601 1602 def _advance(self, times: int = 1) -> None: 1603 self._index += times 1604 self._curr = seq_get(self._tokens, self._index) 1605 self._next = seq_get(self._tokens, self._index + 1) 1606 1607 if self._index > 0: 1608 self._prev = self._tokens[self._index - 1] 1609 self._prev_comments = self._prev.comments 1610 else: 1611 self._prev = None 1612 self._prev_comments = None 1613 1614 def _retreat(self, index: int) -> None: 1615 if index != self._index: 1616 self._advance(index - self._index) 1617 1618 def _warn_unsupported(self) -> None: 1619 if len(self._tokens) <= 1: 1620 return 1621 1622 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1623 # interested in emitting a warning for the one being currently processed. 1624 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1625 1626 logger.warning( 1627 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1628 ) 1629 1630 def _parse_command(self) -> exp.Command: 1631 self._warn_unsupported() 1632 return self.expression( 1633 exp.Command, 1634 comments=self._prev_comments, 1635 this=self._prev.text.upper(), 1636 expression=self._parse_string(), 1637 ) 1638 1639 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1640 """ 1641 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1642 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1643 solve this by setting & resetting the parser state accordingly 1644 """ 1645 index = self._index 1646 error_level = self.error_level 1647 1648 self.error_level = ErrorLevel.IMMEDIATE 1649 try: 1650 this = parse_method() 1651 except ParseError: 1652 this = None 1653 finally: 1654 if not this or retreat: 1655 self._retreat(index) 1656 self.error_level = error_level 1657 1658 return this 1659 1660 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1661 start = self._prev 1662 exists = self._parse_exists() if allow_exists else None 1663 1664 self._match(TokenType.ON) 1665 1666 materialized = self._match_text_seq("MATERIALIZED") 1667 kind = self._match_set(self.CREATABLES) and self._prev 1668 if not kind: 1669 return self._parse_as_command(start) 1670 1671 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1672 this = self._parse_user_defined_function(kind=kind.token_type) 1673 elif kind.token_type == TokenType.TABLE: 1674 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1675 elif kind.token_type == TokenType.COLUMN: 1676 this = self._parse_column() 1677 else: 1678 this = self._parse_id_var() 1679 1680 self._match(TokenType.IS) 1681 1682 return self.expression( 1683 exp.Comment, 1684 this=this, 1685 kind=kind.text, 1686 expression=self._parse_string(), 1687 exists=exists, 1688 materialized=materialized, 1689 ) 1690 1691 def _parse_to_table( 1692 self, 1693 ) -> exp.ToTableProperty: 1694 table = self._parse_table_parts(schema=True) 1695 return self.expression(exp.ToTableProperty, this=table) 1696 1697 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1698 def _parse_ttl(self) -> exp.Expression: 1699 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1700 this = self._parse_bitwise() 1701 1702 if self._match_text_seq("DELETE"): 1703 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1704 if self._match_text_seq("RECOMPRESS"): 1705 return self.expression( 1706 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1707 ) 1708 if self._match_text_seq("TO", "DISK"): 1709 return self.expression( 1710 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1711 ) 1712 if self._match_text_seq("TO", "VOLUME"): 1713 return self.expression( 1714 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1715 ) 1716 1717 return this 1718 1719 expressions = self._parse_csv(_parse_ttl_action) 1720 where = self._parse_where() 1721 group = self._parse_group() 1722 1723 aggregates = None 1724 if group and self._match(TokenType.SET): 1725 aggregates = self._parse_csv(self._parse_set_item) 1726 1727 return self.expression( 1728 exp.MergeTreeTTL, 1729 expressions=expressions, 1730 where=where, 1731 group=group, 1732 aggregates=aggregates, 1733 ) 1734 1735 def _parse_statement(self) -> t.Optional[exp.Expression]: 1736 if self._curr is None: 1737 return None 1738 1739 if self._match_set(self.STATEMENT_PARSERS): 1740 comments = self._prev_comments 1741 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1742 stmt.add_comments(comments, prepend=True) 1743 return stmt 1744 1745 if self._match_set(self.dialect.tokenizer.COMMANDS): 1746 return self._parse_command() 1747 1748 expression = self._parse_expression() 1749 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1750 return self._parse_query_modifiers(expression) 1751 1752 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1753 start = self._prev 1754 temporary = self._match(TokenType.TEMPORARY) 1755 materialized = self._match_text_seq("MATERIALIZED") 1756 1757 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1758 if not kind: 1759 return self._parse_as_command(start) 1760 1761 concurrently = self._match_text_seq("CONCURRENTLY") 1762 if_exists = exists or self._parse_exists() 1763 1764 if kind == "COLUMN": 1765 this = self._parse_column() 1766 else: 1767 this = self._parse_table_parts( 1768 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1769 ) 1770 1771 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1772 1773 if self._match(TokenType.L_PAREN, advance=False): 1774 expressions = self._parse_wrapped_csv(self._parse_types) 1775 else: 1776 expressions = None 1777 1778 return self.expression( 1779 exp.Drop, 1780 exists=if_exists, 1781 this=this, 1782 expressions=expressions, 1783 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1784 temporary=temporary, 1785 materialized=materialized, 1786 cascade=self._match_text_seq("CASCADE"), 1787 constraints=self._match_text_seq("CONSTRAINTS"), 1788 purge=self._match_text_seq("PURGE"), 1789 cluster=cluster, 1790 concurrently=concurrently, 1791 ) 1792 1793 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1794 return ( 1795 self._match_text_seq("IF") 1796 and (not not_ or self._match(TokenType.NOT)) 1797 and self._match(TokenType.EXISTS) 1798 ) 1799 1800 def _parse_create(self) -> exp.Create | exp.Command: 1801 # Note: this can't be None because we've matched a statement parser 1802 start = self._prev 1803 1804 replace = ( 1805 start.token_type == TokenType.REPLACE 1806 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1807 or self._match_pair(TokenType.OR, TokenType.ALTER) 1808 ) 1809 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1810 1811 unique = self._match(TokenType.UNIQUE) 1812 1813 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1814 clustered = True 1815 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1816 "COLUMNSTORE" 1817 ): 1818 clustered = False 1819 else: 1820 clustered = None 1821 1822 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1823 self._advance() 1824 1825 properties = None 1826 create_token = self._match_set(self.CREATABLES) and self._prev 1827 1828 if not create_token: 1829 # exp.Properties.Location.POST_CREATE 1830 properties = self._parse_properties() 1831 create_token = self._match_set(self.CREATABLES) and self._prev 1832 1833 if not properties or not create_token: 1834 return self._parse_as_command(start) 1835 1836 concurrently = self._match_text_seq("CONCURRENTLY") 1837 exists = self._parse_exists(not_=True) 1838 this = None 1839 expression: t.Optional[exp.Expression] = None 1840 indexes = None 1841 no_schema_binding = None 1842 begin = None 1843 end = None 1844 clone = None 1845 1846 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1847 nonlocal properties 1848 if properties and temp_props: 1849 properties.expressions.extend(temp_props.expressions) 1850 elif temp_props: 1851 properties = temp_props 1852 1853 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1854 this = self._parse_user_defined_function(kind=create_token.token_type) 1855 1856 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1857 extend_props(self._parse_properties()) 1858 1859 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1860 extend_props(self._parse_properties()) 1861 1862 if not expression: 1863 if self._match(TokenType.COMMAND): 1864 expression = self._parse_as_command(self._prev) 1865 else: 1866 begin = self._match(TokenType.BEGIN) 1867 return_ = self._match_text_seq("RETURN") 1868 1869 if self._match(TokenType.STRING, advance=False): 1870 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1871 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1872 expression = self._parse_string() 1873 extend_props(self._parse_properties()) 1874 else: 1875 expression = self._parse_user_defined_function_expression() 1876 1877 end = self._match_text_seq("END") 1878 1879 if return_: 1880 expression = self.expression(exp.Return, this=expression) 1881 elif create_token.token_type == TokenType.INDEX: 1882 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1883 if not self._match(TokenType.ON): 1884 index = self._parse_id_var() 1885 anonymous = False 1886 else: 1887 index = None 1888 anonymous = True 1889 1890 this = self._parse_index(index=index, anonymous=anonymous) 1891 elif create_token.token_type in self.DB_CREATABLES: 1892 table_parts = self._parse_table_parts( 1893 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1894 ) 1895 1896 # exp.Properties.Location.POST_NAME 1897 self._match(TokenType.COMMA) 1898 extend_props(self._parse_properties(before=True)) 1899 1900 this = self._parse_schema(this=table_parts) 1901 1902 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1903 extend_props(self._parse_properties()) 1904 1905 self._match(TokenType.ALIAS) 1906 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1907 # exp.Properties.Location.POST_ALIAS 1908 extend_props(self._parse_properties()) 1909 1910 if create_token.token_type == TokenType.SEQUENCE: 1911 expression = self._parse_types() 1912 extend_props(self._parse_properties()) 1913 else: 1914 expression = self._parse_ddl_select() 1915 1916 if create_token.token_type == TokenType.TABLE: 1917 # exp.Properties.Location.POST_EXPRESSION 1918 extend_props(self._parse_properties()) 1919 1920 indexes = [] 1921 while True: 1922 index = self._parse_index() 1923 1924 # exp.Properties.Location.POST_INDEX 1925 extend_props(self._parse_properties()) 1926 if not index: 1927 break 1928 else: 1929 self._match(TokenType.COMMA) 1930 indexes.append(index) 1931 elif create_token.token_type == TokenType.VIEW: 1932 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1933 no_schema_binding = True 1934 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1935 extend_props(self._parse_properties()) 1936 1937 shallow = self._match_text_seq("SHALLOW") 1938 1939 if self._match_texts(self.CLONE_KEYWORDS): 1940 copy = self._prev.text.lower() == "copy" 1941 clone = self.expression( 1942 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1943 ) 1944 1945 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1946 return self._parse_as_command(start) 1947 1948 create_kind_text = create_token.text.upper() 1949 return self.expression( 1950 exp.Create, 1951 this=this, 1952 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1953 replace=replace, 1954 refresh=refresh, 1955 unique=unique, 1956 expression=expression, 1957 exists=exists, 1958 properties=properties, 1959 indexes=indexes, 1960 no_schema_binding=no_schema_binding, 1961 begin=begin, 1962 end=end, 1963 clone=clone, 1964 concurrently=concurrently, 1965 clustered=clustered, 1966 ) 1967 1968 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1969 seq = exp.SequenceProperties() 1970 1971 options = [] 1972 index = self._index 1973 1974 while self._curr: 1975 self._match(TokenType.COMMA) 1976 if self._match_text_seq("INCREMENT"): 1977 self._match_text_seq("BY") 1978 self._match_text_seq("=") 1979 seq.set("increment", self._parse_term()) 1980 elif self._match_text_seq("MINVALUE"): 1981 seq.set("minvalue", self._parse_term()) 1982 elif self._match_text_seq("MAXVALUE"): 1983 seq.set("maxvalue", self._parse_term()) 1984 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1985 self._match_text_seq("=") 1986 seq.set("start", self._parse_term()) 1987 elif self._match_text_seq("CACHE"): 1988 # T-SQL allows empty CACHE which is initialized dynamically 1989 seq.set("cache", self._parse_number() or True) 1990 elif self._match_text_seq("OWNED", "BY"): 1991 # "OWNED BY NONE" is the default 1992 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1993 else: 1994 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1995 if opt: 1996 options.append(opt) 1997 else: 1998 break 1999 2000 seq.set("options", options if options else None) 2001 return None if self._index == index else seq 2002 2003 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2004 # only used for teradata currently 2005 self._match(TokenType.COMMA) 2006 2007 kwargs = { 2008 "no": self._match_text_seq("NO"), 2009 "dual": self._match_text_seq("DUAL"), 2010 "before": self._match_text_seq("BEFORE"), 2011 "default": self._match_text_seq("DEFAULT"), 2012 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2013 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2014 "after": self._match_text_seq("AFTER"), 2015 "minimum": self._match_texts(("MIN", "MINIMUM")), 2016 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2017 } 2018 2019 if self._match_texts(self.PROPERTY_PARSERS): 2020 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2021 try: 2022 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2023 except TypeError: 2024 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2025 2026 return None 2027 2028 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2029 return self._parse_wrapped_csv(self._parse_property) 2030 2031 def _parse_property(self) -> t.Optional[exp.Expression]: 2032 if self._match_texts(self.PROPERTY_PARSERS): 2033 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2034 2035 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2036 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2037 2038 if self._match_text_seq("COMPOUND", "SORTKEY"): 2039 return self._parse_sortkey(compound=True) 2040 2041 if self._match_text_seq("SQL", "SECURITY"): 2042 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2043 2044 index = self._index 2045 key = self._parse_column() 2046 2047 if not self._match(TokenType.EQ): 2048 self._retreat(index) 2049 return self._parse_sequence_properties() 2050 2051 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2052 if isinstance(key, exp.Column): 2053 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2054 2055 value = self._parse_bitwise() or self._parse_var(any_token=True) 2056 2057 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2058 if isinstance(value, exp.Column): 2059 value = exp.var(value.name) 2060 2061 return self.expression(exp.Property, this=key, value=value) 2062 2063 def _parse_stored(self) -> exp.FileFormatProperty: 2064 self._match(TokenType.ALIAS) 2065 2066 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2067 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2068 2069 return self.expression( 2070 exp.FileFormatProperty, 2071 this=( 2072 self.expression( 2073 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2074 ) 2075 if input_format or output_format 2076 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2077 ), 2078 ) 2079 2080 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2081 field = self._parse_field() 2082 if isinstance(field, exp.Identifier) and not field.quoted: 2083 field = exp.var(field) 2084 2085 return field 2086 2087 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2088 self._match(TokenType.EQ) 2089 self._match(TokenType.ALIAS) 2090 2091 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2092 2093 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2094 properties = [] 2095 while True: 2096 if before: 2097 prop = self._parse_property_before() 2098 else: 2099 prop = self._parse_property() 2100 if not prop: 2101 break 2102 for p in ensure_list(prop): 2103 properties.append(p) 2104 2105 if properties: 2106 return self.expression(exp.Properties, expressions=properties) 2107 2108 return None 2109 2110 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2111 return self.expression( 2112 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2113 ) 2114 2115 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2116 if self._match_texts(("DEFINER", "INVOKER")): 2117 security_specifier = self._prev.text.upper() 2118 return self.expression(exp.SecurityProperty, this=security_specifier) 2119 return None 2120 2121 def _parse_settings_property(self) -> exp.SettingsProperty: 2122 return self.expression( 2123 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2124 ) 2125 2126 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2127 if self._index >= 2: 2128 pre_volatile_token = self._tokens[self._index - 2] 2129 else: 2130 pre_volatile_token = None 2131 2132 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2133 return exp.VolatileProperty() 2134 2135 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2136 2137 def _parse_retention_period(self) -> exp.Var: 2138 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2139 number = self._parse_number() 2140 number_str = f"{number} " if number else "" 2141 unit = self._parse_var(any_token=True) 2142 return exp.var(f"{number_str}{unit}") 2143 2144 def _parse_system_versioning_property( 2145 self, with_: bool = False 2146 ) -> exp.WithSystemVersioningProperty: 2147 self._match(TokenType.EQ) 2148 prop = self.expression( 2149 exp.WithSystemVersioningProperty, 2150 **{ # type: ignore 2151 "on": True, 2152 "with": with_, 2153 }, 2154 ) 2155 2156 if self._match_text_seq("OFF"): 2157 prop.set("on", False) 2158 return prop 2159 2160 self._match(TokenType.ON) 2161 if self._match(TokenType.L_PAREN): 2162 while self._curr and not self._match(TokenType.R_PAREN): 2163 if self._match_text_seq("HISTORY_TABLE", "="): 2164 prop.set("this", self._parse_table_parts()) 2165 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2166 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2167 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2168 prop.set("retention_period", self._parse_retention_period()) 2169 2170 self._match(TokenType.COMMA) 2171 2172 return prop 2173 2174 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2175 self._match(TokenType.EQ) 2176 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2177 prop = self.expression(exp.DataDeletionProperty, on=on) 2178 2179 if self._match(TokenType.L_PAREN): 2180 while self._curr and not self._match(TokenType.R_PAREN): 2181 if self._match_text_seq("FILTER_COLUMN", "="): 2182 prop.set("filter_column", self._parse_column()) 2183 elif self._match_text_seq("RETENTION_PERIOD", "="): 2184 prop.set("retention_period", self._parse_retention_period()) 2185 2186 self._match(TokenType.COMMA) 2187 2188 return prop 2189 2190 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2191 kind = "HASH" 2192 expressions: t.Optional[t.List[exp.Expression]] = None 2193 if self._match_text_seq("BY", "HASH"): 2194 expressions = self._parse_wrapped_csv(self._parse_id_var) 2195 elif self._match_text_seq("BY", "RANDOM"): 2196 kind = "RANDOM" 2197 2198 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2199 buckets: t.Optional[exp.Expression] = None 2200 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2201 buckets = self._parse_number() 2202 2203 return self.expression( 2204 exp.DistributedByProperty, 2205 expressions=expressions, 2206 kind=kind, 2207 buckets=buckets, 2208 order=self._parse_order(), 2209 ) 2210 2211 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2212 self._match_text_seq("KEY") 2213 expressions = self._parse_wrapped_id_vars() 2214 return self.expression(expr_type, expressions=expressions) 2215 2216 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2217 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2218 prop = self._parse_system_versioning_property(with_=True) 2219 self._match_r_paren() 2220 return prop 2221 2222 if self._match(TokenType.L_PAREN, advance=False): 2223 return self._parse_wrapped_properties() 2224 2225 if self._match_text_seq("JOURNAL"): 2226 return self._parse_withjournaltable() 2227 2228 if self._match_texts(self.VIEW_ATTRIBUTES): 2229 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2230 2231 if self._match_text_seq("DATA"): 2232 return self._parse_withdata(no=False) 2233 elif self._match_text_seq("NO", "DATA"): 2234 return self._parse_withdata(no=True) 2235 2236 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2237 return self._parse_serde_properties(with_=True) 2238 2239 if self._match(TokenType.SCHEMA): 2240 return self.expression( 2241 exp.WithSchemaBindingProperty, 2242 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2243 ) 2244 2245 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2246 return self.expression( 2247 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2248 ) 2249 2250 if not self._next: 2251 return None 2252 2253 return self._parse_withisolatedloading() 2254 2255 def _parse_procedure_option(self) -> exp.Expression | None: 2256 if self._match_text_seq("EXECUTE", "AS"): 2257 return self.expression( 2258 exp.ExecuteAsProperty, 2259 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2260 or self._parse_string(), 2261 ) 2262 2263 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2264 2265 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2266 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2267 self._match(TokenType.EQ) 2268 2269 user = self._parse_id_var() 2270 self._match(TokenType.PARAMETER) 2271 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2272 2273 if not user or not host: 2274 return None 2275 2276 return exp.DefinerProperty(this=f"{user}@{host}") 2277 2278 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2279 self._match(TokenType.TABLE) 2280 self._match(TokenType.EQ) 2281 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2282 2283 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2284 return self.expression(exp.LogProperty, no=no) 2285 2286 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2287 return self.expression(exp.JournalProperty, **kwargs) 2288 2289 def _parse_checksum(self) -> exp.ChecksumProperty: 2290 self._match(TokenType.EQ) 2291 2292 on = None 2293 if self._match(TokenType.ON): 2294 on = True 2295 elif self._match_text_seq("OFF"): 2296 on = False 2297 2298 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2299 2300 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2301 return self.expression( 2302 exp.Cluster, 2303 expressions=( 2304 self._parse_wrapped_csv(self._parse_ordered) 2305 if wrapped 2306 else self._parse_csv(self._parse_ordered) 2307 ), 2308 ) 2309 2310 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2311 self._match_text_seq("BY") 2312 2313 self._match_l_paren() 2314 expressions = self._parse_csv(self._parse_column) 2315 self._match_r_paren() 2316 2317 if self._match_text_seq("SORTED", "BY"): 2318 self._match_l_paren() 2319 sorted_by = self._parse_csv(self._parse_ordered) 2320 self._match_r_paren() 2321 else: 2322 sorted_by = None 2323 2324 self._match(TokenType.INTO) 2325 buckets = self._parse_number() 2326 self._match_text_seq("BUCKETS") 2327 2328 return self.expression( 2329 exp.ClusteredByProperty, 2330 expressions=expressions, 2331 sorted_by=sorted_by, 2332 buckets=buckets, 2333 ) 2334 2335 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2336 if not self._match_text_seq("GRANTS"): 2337 self._retreat(self._index - 1) 2338 return None 2339 2340 return self.expression(exp.CopyGrantsProperty) 2341 2342 def _parse_freespace(self) -> exp.FreespaceProperty: 2343 self._match(TokenType.EQ) 2344 return self.expression( 2345 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2346 ) 2347 2348 def _parse_mergeblockratio( 2349 self, no: bool = False, default: bool = False 2350 ) -> exp.MergeBlockRatioProperty: 2351 if self._match(TokenType.EQ): 2352 return self.expression( 2353 exp.MergeBlockRatioProperty, 2354 this=self._parse_number(), 2355 percent=self._match(TokenType.PERCENT), 2356 ) 2357 2358 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2359 2360 def _parse_datablocksize( 2361 self, 2362 default: t.Optional[bool] = None, 2363 minimum: t.Optional[bool] = None, 2364 maximum: t.Optional[bool] = None, 2365 ) -> exp.DataBlocksizeProperty: 2366 self._match(TokenType.EQ) 2367 size = self._parse_number() 2368 2369 units = None 2370 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2371 units = self._prev.text 2372 2373 return self.expression( 2374 exp.DataBlocksizeProperty, 2375 size=size, 2376 units=units, 2377 default=default, 2378 minimum=minimum, 2379 maximum=maximum, 2380 ) 2381 2382 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2383 self._match(TokenType.EQ) 2384 always = self._match_text_seq("ALWAYS") 2385 manual = self._match_text_seq("MANUAL") 2386 never = self._match_text_seq("NEVER") 2387 default = self._match_text_seq("DEFAULT") 2388 2389 autotemp = None 2390 if self._match_text_seq("AUTOTEMP"): 2391 autotemp = self._parse_schema() 2392 2393 return self.expression( 2394 exp.BlockCompressionProperty, 2395 always=always, 2396 manual=manual, 2397 never=never, 2398 default=default, 2399 autotemp=autotemp, 2400 ) 2401 2402 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2403 index = self._index 2404 no = self._match_text_seq("NO") 2405 concurrent = self._match_text_seq("CONCURRENT") 2406 2407 if not self._match_text_seq("ISOLATED", "LOADING"): 2408 self._retreat(index) 2409 return None 2410 2411 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2412 return self.expression( 2413 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2414 ) 2415 2416 def _parse_locking(self) -> exp.LockingProperty: 2417 if self._match(TokenType.TABLE): 2418 kind = "TABLE" 2419 elif self._match(TokenType.VIEW): 2420 kind = "VIEW" 2421 elif self._match(TokenType.ROW): 2422 kind = "ROW" 2423 elif self._match_text_seq("DATABASE"): 2424 kind = "DATABASE" 2425 else: 2426 kind = None 2427 2428 if kind in ("DATABASE", "TABLE", "VIEW"): 2429 this = self._parse_table_parts() 2430 else: 2431 this = None 2432 2433 if self._match(TokenType.FOR): 2434 for_or_in = "FOR" 2435 elif self._match(TokenType.IN): 2436 for_or_in = "IN" 2437 else: 2438 for_or_in = None 2439 2440 if self._match_text_seq("ACCESS"): 2441 lock_type = "ACCESS" 2442 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2443 lock_type = "EXCLUSIVE" 2444 elif self._match_text_seq("SHARE"): 2445 lock_type = "SHARE" 2446 elif self._match_text_seq("READ"): 2447 lock_type = "READ" 2448 elif self._match_text_seq("WRITE"): 2449 lock_type = "WRITE" 2450 elif self._match_text_seq("CHECKSUM"): 2451 lock_type = "CHECKSUM" 2452 else: 2453 lock_type = None 2454 2455 override = self._match_text_seq("OVERRIDE") 2456 2457 return self.expression( 2458 exp.LockingProperty, 2459 this=this, 2460 kind=kind, 2461 for_or_in=for_or_in, 2462 lock_type=lock_type, 2463 override=override, 2464 ) 2465 2466 def _parse_partition_by(self) -> t.List[exp.Expression]: 2467 if self._match(TokenType.PARTITION_BY): 2468 return self._parse_csv(self._parse_assignment) 2469 return [] 2470 2471 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2472 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2473 if self._match_text_seq("MINVALUE"): 2474 return exp.var("MINVALUE") 2475 if self._match_text_seq("MAXVALUE"): 2476 return exp.var("MAXVALUE") 2477 return self._parse_bitwise() 2478 2479 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2480 expression = None 2481 from_expressions = None 2482 to_expressions = None 2483 2484 if self._match(TokenType.IN): 2485 this = self._parse_wrapped_csv(self._parse_bitwise) 2486 elif self._match(TokenType.FROM): 2487 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2488 self._match_text_seq("TO") 2489 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2490 elif self._match_text_seq("WITH", "(", "MODULUS"): 2491 this = self._parse_number() 2492 self._match_text_seq(",", "REMAINDER") 2493 expression = self._parse_number() 2494 self._match_r_paren() 2495 else: 2496 self.raise_error("Failed to parse partition bound spec.") 2497 2498 return self.expression( 2499 exp.PartitionBoundSpec, 2500 this=this, 2501 expression=expression, 2502 from_expressions=from_expressions, 2503 to_expressions=to_expressions, 2504 ) 2505 2506 # https://www.postgresql.org/docs/current/sql-createtable.html 2507 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2508 if not self._match_text_seq("OF"): 2509 self._retreat(self._index - 1) 2510 return None 2511 2512 this = self._parse_table(schema=True) 2513 2514 if self._match(TokenType.DEFAULT): 2515 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2516 elif self._match_text_seq("FOR", "VALUES"): 2517 expression = self._parse_partition_bound_spec() 2518 else: 2519 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2520 2521 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2522 2523 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2524 self._match(TokenType.EQ) 2525 return self.expression( 2526 exp.PartitionedByProperty, 2527 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2528 ) 2529 2530 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2531 if self._match_text_seq("AND", "STATISTICS"): 2532 statistics = True 2533 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2534 statistics = False 2535 else: 2536 statistics = None 2537 2538 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2539 2540 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2541 if self._match_text_seq("SQL"): 2542 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2543 return None 2544 2545 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2546 if self._match_text_seq("SQL", "DATA"): 2547 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2548 return None 2549 2550 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2551 if self._match_text_seq("PRIMARY", "INDEX"): 2552 return exp.NoPrimaryIndexProperty() 2553 if self._match_text_seq("SQL"): 2554 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2555 return None 2556 2557 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2558 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2559 return exp.OnCommitProperty() 2560 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2561 return exp.OnCommitProperty(delete=True) 2562 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2563 2564 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2565 if self._match_text_seq("SQL", "DATA"): 2566 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2567 return None 2568 2569 def _parse_distkey(self) -> exp.DistKeyProperty: 2570 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2571 2572 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2573 table = self._parse_table(schema=True) 2574 2575 options = [] 2576 while self._match_texts(("INCLUDING", "EXCLUDING")): 2577 this = self._prev.text.upper() 2578 2579 id_var = self._parse_id_var() 2580 if not id_var: 2581 return None 2582 2583 options.append( 2584 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2585 ) 2586 2587 return self.expression(exp.LikeProperty, this=table, expressions=options) 2588 2589 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2590 return self.expression( 2591 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2592 ) 2593 2594 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2595 self._match(TokenType.EQ) 2596 return self.expression( 2597 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2598 ) 2599 2600 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2601 self._match_text_seq("WITH", "CONNECTION") 2602 return self.expression( 2603 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2604 ) 2605 2606 def _parse_returns(self) -> exp.ReturnsProperty: 2607 value: t.Optional[exp.Expression] 2608 null = None 2609 is_table = self._match(TokenType.TABLE) 2610 2611 if is_table: 2612 if self._match(TokenType.LT): 2613 value = self.expression( 2614 exp.Schema, 2615 this="TABLE", 2616 expressions=self._parse_csv(self._parse_struct_types), 2617 ) 2618 if not self._match(TokenType.GT): 2619 self.raise_error("Expecting >") 2620 else: 2621 value = self._parse_schema(exp.var("TABLE")) 2622 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2623 null = True 2624 value = None 2625 else: 2626 value = self._parse_types() 2627 2628 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2629 2630 def _parse_describe(self) -> exp.Describe: 2631 kind = self._match_set(self.CREATABLES) and self._prev.text 2632 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2633 if self._match(TokenType.DOT): 2634 style = None 2635 self._retreat(self._index - 2) 2636 2637 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2638 2639 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2640 this = self._parse_statement() 2641 else: 2642 this = self._parse_table(schema=True) 2643 2644 properties = self._parse_properties() 2645 expressions = properties.expressions if properties else None 2646 partition = self._parse_partition() 2647 return self.expression( 2648 exp.Describe, 2649 this=this, 2650 style=style, 2651 kind=kind, 2652 expressions=expressions, 2653 partition=partition, 2654 format=format, 2655 ) 2656 2657 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2658 kind = self._prev.text.upper() 2659 expressions = [] 2660 2661 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2662 if self._match(TokenType.WHEN): 2663 expression = self._parse_disjunction() 2664 self._match(TokenType.THEN) 2665 else: 2666 expression = None 2667 2668 else_ = self._match(TokenType.ELSE) 2669 2670 if not self._match(TokenType.INTO): 2671 return None 2672 2673 return self.expression( 2674 exp.ConditionalInsert, 2675 this=self.expression( 2676 exp.Insert, 2677 this=self._parse_table(schema=True), 2678 expression=self._parse_derived_table_values(), 2679 ), 2680 expression=expression, 2681 else_=else_, 2682 ) 2683 2684 expression = parse_conditional_insert() 2685 while expression is not None: 2686 expressions.append(expression) 2687 expression = parse_conditional_insert() 2688 2689 return self.expression( 2690 exp.MultitableInserts, 2691 kind=kind, 2692 comments=comments, 2693 expressions=expressions, 2694 source=self._parse_table(), 2695 ) 2696 2697 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2698 comments = [] 2699 hint = self._parse_hint() 2700 overwrite = self._match(TokenType.OVERWRITE) 2701 ignore = self._match(TokenType.IGNORE) 2702 local = self._match_text_seq("LOCAL") 2703 alternative = None 2704 is_function = None 2705 2706 if self._match_text_seq("DIRECTORY"): 2707 this: t.Optional[exp.Expression] = self.expression( 2708 exp.Directory, 2709 this=self._parse_var_or_string(), 2710 local=local, 2711 row_format=self._parse_row_format(match_row=True), 2712 ) 2713 else: 2714 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2715 comments += ensure_list(self._prev_comments) 2716 return self._parse_multitable_inserts(comments) 2717 2718 if self._match(TokenType.OR): 2719 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2720 2721 self._match(TokenType.INTO) 2722 comments += ensure_list(self._prev_comments) 2723 self._match(TokenType.TABLE) 2724 is_function = self._match(TokenType.FUNCTION) 2725 2726 this = ( 2727 self._parse_table(schema=True, parse_partition=True) 2728 if not is_function 2729 else self._parse_function() 2730 ) 2731 2732 returning = self._parse_returning() 2733 2734 return self.expression( 2735 exp.Insert, 2736 comments=comments, 2737 hint=hint, 2738 is_function=is_function, 2739 this=this, 2740 stored=self._match_text_seq("STORED") and self._parse_stored(), 2741 by_name=self._match_text_seq("BY", "NAME"), 2742 exists=self._parse_exists(), 2743 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2744 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2745 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2746 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2747 conflict=self._parse_on_conflict(), 2748 returning=returning or self._parse_returning(), 2749 overwrite=overwrite, 2750 alternative=alternative, 2751 ignore=ignore, 2752 source=self._match(TokenType.TABLE) and self._parse_table(), 2753 ) 2754 2755 def _parse_kill(self) -> exp.Kill: 2756 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2757 2758 return self.expression( 2759 exp.Kill, 2760 this=self._parse_primary(), 2761 kind=kind, 2762 ) 2763 2764 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2765 conflict = self._match_text_seq("ON", "CONFLICT") 2766 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2767 2768 if not conflict and not duplicate: 2769 return None 2770 2771 conflict_keys = None 2772 constraint = None 2773 2774 if conflict: 2775 if self._match_text_seq("ON", "CONSTRAINT"): 2776 constraint = self._parse_id_var() 2777 elif self._match(TokenType.L_PAREN): 2778 conflict_keys = self._parse_csv(self._parse_id_var) 2779 self._match_r_paren() 2780 2781 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2782 if self._prev.token_type == TokenType.UPDATE: 2783 self._match(TokenType.SET) 2784 expressions = self._parse_csv(self._parse_equality) 2785 else: 2786 expressions = None 2787 2788 return self.expression( 2789 exp.OnConflict, 2790 duplicate=duplicate, 2791 expressions=expressions, 2792 action=action, 2793 conflict_keys=conflict_keys, 2794 constraint=constraint, 2795 ) 2796 2797 def _parse_returning(self) -> t.Optional[exp.Returning]: 2798 if not self._match(TokenType.RETURNING): 2799 return None 2800 return self.expression( 2801 exp.Returning, 2802 expressions=self._parse_csv(self._parse_expression), 2803 into=self._match(TokenType.INTO) and self._parse_table_part(), 2804 ) 2805 2806 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2807 if not self._match(TokenType.FORMAT): 2808 return None 2809 return self._parse_row_format() 2810 2811 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2812 index = self._index 2813 with_ = with_ or self._match_text_seq("WITH") 2814 2815 if not self._match(TokenType.SERDE_PROPERTIES): 2816 self._retreat(index) 2817 return None 2818 return self.expression( 2819 exp.SerdeProperties, 2820 **{ # type: ignore 2821 "expressions": self._parse_wrapped_properties(), 2822 "with": with_, 2823 }, 2824 ) 2825 2826 def _parse_row_format( 2827 self, match_row: bool = False 2828 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2829 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2830 return None 2831 2832 if self._match_text_seq("SERDE"): 2833 this = self._parse_string() 2834 2835 serde_properties = self._parse_serde_properties() 2836 2837 return self.expression( 2838 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2839 ) 2840 2841 self._match_text_seq("DELIMITED") 2842 2843 kwargs = {} 2844 2845 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2846 kwargs["fields"] = self._parse_string() 2847 if self._match_text_seq("ESCAPED", "BY"): 2848 kwargs["escaped"] = self._parse_string() 2849 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2850 kwargs["collection_items"] = self._parse_string() 2851 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2852 kwargs["map_keys"] = self._parse_string() 2853 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2854 kwargs["lines"] = self._parse_string() 2855 if self._match_text_seq("NULL", "DEFINED", "AS"): 2856 kwargs["null"] = self._parse_string() 2857 2858 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2859 2860 def _parse_load(self) -> exp.LoadData | exp.Command: 2861 if self._match_text_seq("DATA"): 2862 local = self._match_text_seq("LOCAL") 2863 self._match_text_seq("INPATH") 2864 inpath = self._parse_string() 2865 overwrite = self._match(TokenType.OVERWRITE) 2866 self._match_pair(TokenType.INTO, TokenType.TABLE) 2867 2868 return self.expression( 2869 exp.LoadData, 2870 this=self._parse_table(schema=True), 2871 local=local, 2872 overwrite=overwrite, 2873 inpath=inpath, 2874 partition=self._parse_partition(), 2875 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2876 serde=self._match_text_seq("SERDE") and self._parse_string(), 2877 ) 2878 return self._parse_as_command(self._prev) 2879 2880 def _parse_delete(self) -> exp.Delete: 2881 # This handles MySQL's "Multiple-Table Syntax" 2882 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2883 tables = None 2884 if not self._match(TokenType.FROM, advance=False): 2885 tables = self._parse_csv(self._parse_table) or None 2886 2887 returning = self._parse_returning() 2888 2889 return self.expression( 2890 exp.Delete, 2891 tables=tables, 2892 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2893 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2894 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2895 where=self._parse_where(), 2896 returning=returning or self._parse_returning(), 2897 limit=self._parse_limit(), 2898 ) 2899 2900 def _parse_update(self) -> exp.Update: 2901 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2902 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2903 returning = self._parse_returning() 2904 return self.expression( 2905 exp.Update, 2906 **{ # type: ignore 2907 "this": this, 2908 "expressions": expressions, 2909 "from": self._parse_from(joins=True), 2910 "where": self._parse_where(), 2911 "returning": returning or self._parse_returning(), 2912 "order": self._parse_order(), 2913 "limit": self._parse_limit(), 2914 }, 2915 ) 2916 2917 def _parse_uncache(self) -> exp.Uncache: 2918 if not self._match(TokenType.TABLE): 2919 self.raise_error("Expecting TABLE after UNCACHE") 2920 2921 return self.expression( 2922 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2923 ) 2924 2925 def _parse_cache(self) -> exp.Cache: 2926 lazy = self._match_text_seq("LAZY") 2927 self._match(TokenType.TABLE) 2928 table = self._parse_table(schema=True) 2929 2930 options = [] 2931 if self._match_text_seq("OPTIONS"): 2932 self._match_l_paren() 2933 k = self._parse_string() 2934 self._match(TokenType.EQ) 2935 v = self._parse_string() 2936 options = [k, v] 2937 self._match_r_paren() 2938 2939 self._match(TokenType.ALIAS) 2940 return self.expression( 2941 exp.Cache, 2942 this=table, 2943 lazy=lazy, 2944 options=options, 2945 expression=self._parse_select(nested=True), 2946 ) 2947 2948 def _parse_partition(self) -> t.Optional[exp.Partition]: 2949 if not self._match(TokenType.PARTITION): 2950 return None 2951 2952 return self.expression( 2953 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2954 ) 2955 2956 def _parse_value(self) -> t.Optional[exp.Tuple]: 2957 def _parse_value_expression() -> t.Optional[exp.Expression]: 2958 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2959 return exp.var(self._prev.text.upper()) 2960 return self._parse_expression() 2961 2962 if self._match(TokenType.L_PAREN): 2963 expressions = self._parse_csv(_parse_value_expression) 2964 self._match_r_paren() 2965 return self.expression(exp.Tuple, expressions=expressions) 2966 2967 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2968 expression = self._parse_expression() 2969 if expression: 2970 return self.expression(exp.Tuple, expressions=[expression]) 2971 return None 2972 2973 def _parse_projections(self) -> t.List[exp.Expression]: 2974 return self._parse_expressions() 2975 2976 def _parse_select( 2977 self, 2978 nested: bool = False, 2979 table: bool = False, 2980 parse_subquery_alias: bool = True, 2981 parse_set_operation: bool = True, 2982 ) -> t.Optional[exp.Expression]: 2983 cte = self._parse_with() 2984 2985 if cte: 2986 this = self._parse_statement() 2987 2988 if not this: 2989 self.raise_error("Failed to parse any statement following CTE") 2990 return cte 2991 2992 if "with" in this.arg_types: 2993 this.set("with", cte) 2994 else: 2995 self.raise_error(f"{this.key} does not support CTE") 2996 this = cte 2997 2998 return this 2999 3000 # duckdb supports leading with FROM x 3001 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3002 3003 if self._match(TokenType.SELECT): 3004 comments = self._prev_comments 3005 3006 hint = self._parse_hint() 3007 3008 if self._next and not self._next.token_type == TokenType.DOT: 3009 all_ = self._match(TokenType.ALL) 3010 distinct = self._match_set(self.DISTINCT_TOKENS) 3011 else: 3012 all_, distinct = None, None 3013 3014 kind = ( 3015 self._match(TokenType.ALIAS) 3016 and self._match_texts(("STRUCT", "VALUE")) 3017 and self._prev.text.upper() 3018 ) 3019 3020 if distinct: 3021 distinct = self.expression( 3022 exp.Distinct, 3023 on=self._parse_value() if self._match(TokenType.ON) else None, 3024 ) 3025 3026 if all_ and distinct: 3027 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3028 3029 operation_modifiers = [] 3030 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3031 operation_modifiers.append(exp.var(self._prev.text.upper())) 3032 3033 limit = self._parse_limit(top=True) 3034 projections = self._parse_projections() 3035 3036 this = self.expression( 3037 exp.Select, 3038 kind=kind, 3039 hint=hint, 3040 distinct=distinct, 3041 expressions=projections, 3042 limit=limit, 3043 operation_modifiers=operation_modifiers or None, 3044 ) 3045 this.comments = comments 3046 3047 into = self._parse_into() 3048 if into: 3049 this.set("into", into) 3050 3051 if not from_: 3052 from_ = self._parse_from() 3053 3054 if from_: 3055 this.set("from", from_) 3056 3057 this = self._parse_query_modifiers(this) 3058 elif (table or nested) and self._match(TokenType.L_PAREN): 3059 if self._match(TokenType.PIVOT): 3060 this = self._parse_simplified_pivot() 3061 elif self._match(TokenType.FROM): 3062 this = exp.select("*").from_( 3063 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3064 ) 3065 else: 3066 this = ( 3067 self._parse_table() 3068 if table 3069 else self._parse_select(nested=True, parse_set_operation=False) 3070 ) 3071 3072 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3073 # in case a modifier (e.g. join) is following 3074 if table and isinstance(this, exp.Values) and this.alias: 3075 alias = this.args["alias"].pop() 3076 this = exp.Table(this=this, alias=alias) 3077 3078 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3079 3080 self._match_r_paren() 3081 3082 # We return early here so that the UNION isn't attached to the subquery by the 3083 # following call to _parse_set_operations, but instead becomes the parent node 3084 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3085 elif self._match(TokenType.VALUES, advance=False): 3086 this = self._parse_derived_table_values() 3087 elif from_: 3088 this = exp.select("*").from_(from_.this, copy=False) 3089 elif self._match(TokenType.SUMMARIZE): 3090 table = self._match(TokenType.TABLE) 3091 this = self._parse_select() or self._parse_string() or self._parse_table() 3092 return self.expression(exp.Summarize, this=this, table=table) 3093 elif self._match(TokenType.DESCRIBE): 3094 this = self._parse_describe() 3095 elif self._match_text_seq("STREAM"): 3096 this = self._parse_function() 3097 if this: 3098 this = self.expression(exp.Stream, this=this) 3099 else: 3100 self._retreat(self._index - 1) 3101 else: 3102 this = None 3103 3104 return self._parse_set_operations(this) if parse_set_operation else this 3105 3106 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3107 if not skip_with_token and not self._match(TokenType.WITH): 3108 return None 3109 3110 comments = self._prev_comments 3111 recursive = self._match(TokenType.RECURSIVE) 3112 3113 last_comments = None 3114 expressions = [] 3115 while True: 3116 expressions.append(self._parse_cte()) 3117 if last_comments: 3118 expressions[-1].add_comments(last_comments) 3119 3120 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3121 break 3122 else: 3123 self._match(TokenType.WITH) 3124 3125 last_comments = self._prev_comments 3126 3127 return self.expression( 3128 exp.With, comments=comments, expressions=expressions, recursive=recursive 3129 ) 3130 3131 def _parse_cte(self) -> exp.CTE: 3132 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3133 if not alias or not alias.this: 3134 self.raise_error("Expected CTE to have alias") 3135 3136 self._match(TokenType.ALIAS) 3137 comments = self._prev_comments 3138 3139 if self._match_text_seq("NOT", "MATERIALIZED"): 3140 materialized = False 3141 elif self._match_text_seq("MATERIALIZED"): 3142 materialized = True 3143 else: 3144 materialized = None 3145 3146 return self.expression( 3147 exp.CTE, 3148 this=self._parse_wrapped(self._parse_statement), 3149 alias=alias, 3150 materialized=materialized, 3151 comments=comments, 3152 ) 3153 3154 def _parse_table_alias( 3155 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3156 ) -> t.Optional[exp.TableAlias]: 3157 any_token = self._match(TokenType.ALIAS) 3158 alias = ( 3159 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3160 or self._parse_string_as_identifier() 3161 ) 3162 3163 index = self._index 3164 if self._match(TokenType.L_PAREN): 3165 columns = self._parse_csv(self._parse_function_parameter) 3166 self._match_r_paren() if columns else self._retreat(index) 3167 else: 3168 columns = None 3169 3170 if not alias and not columns: 3171 return None 3172 3173 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3174 3175 # We bubble up comments from the Identifier to the TableAlias 3176 if isinstance(alias, exp.Identifier): 3177 table_alias.add_comments(alias.pop_comments()) 3178 3179 return table_alias 3180 3181 def _parse_subquery( 3182 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3183 ) -> t.Optional[exp.Subquery]: 3184 if not this: 3185 return None 3186 3187 return self.expression( 3188 exp.Subquery, 3189 this=this, 3190 pivots=self._parse_pivots(), 3191 alias=self._parse_table_alias() if parse_alias else None, 3192 sample=self._parse_table_sample(), 3193 ) 3194 3195 def _implicit_unnests_to_explicit(self, this: E) -> E: 3196 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3197 3198 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3199 for i, join in enumerate(this.args.get("joins") or []): 3200 table = join.this 3201 normalized_table = table.copy() 3202 normalized_table.meta["maybe_column"] = True 3203 normalized_table = _norm(normalized_table, dialect=self.dialect) 3204 3205 if isinstance(table, exp.Table) and not join.args.get("on"): 3206 if normalized_table.parts[0].name in refs: 3207 table_as_column = table.to_column() 3208 unnest = exp.Unnest(expressions=[table_as_column]) 3209 3210 # Table.to_column creates a parent Alias node that we want to convert to 3211 # a TableAlias and attach to the Unnest, so it matches the parser's output 3212 if isinstance(table.args.get("alias"), exp.TableAlias): 3213 table_as_column.replace(table_as_column.this) 3214 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3215 3216 table.replace(unnest) 3217 3218 refs.add(normalized_table.alias_or_name) 3219 3220 return this 3221 3222 def _parse_query_modifiers( 3223 self, this: t.Optional[exp.Expression] 3224 ) -> t.Optional[exp.Expression]: 3225 if isinstance(this, (exp.Query, exp.Table)): 3226 for join in self._parse_joins(): 3227 this.append("joins", join) 3228 for lateral in iter(self._parse_lateral, None): 3229 this.append("laterals", lateral) 3230 3231 while True: 3232 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3233 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3234 key, expression = parser(self) 3235 3236 if expression: 3237 this.set(key, expression) 3238 if key == "limit": 3239 offset = expression.args.pop("offset", None) 3240 3241 if offset: 3242 offset = exp.Offset(expression=offset) 3243 this.set("offset", offset) 3244 3245 limit_by_expressions = expression.expressions 3246 expression.set("expressions", None) 3247 offset.set("expressions", limit_by_expressions) 3248 continue 3249 break 3250 3251 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3252 this = self._implicit_unnests_to_explicit(this) 3253 3254 return this 3255 3256 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3257 start = self._curr 3258 while self._curr: 3259 self._advance() 3260 3261 end = self._tokens[self._index - 1] 3262 return exp.Hint(expressions=[self._find_sql(start, end)]) 3263 3264 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3265 return self._parse_function_call() 3266 3267 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3268 start_index = self._index 3269 should_fallback_to_string = False 3270 3271 hints = [] 3272 try: 3273 for hint in iter( 3274 lambda: self._parse_csv( 3275 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3276 ), 3277 [], 3278 ): 3279 hints.extend(hint) 3280 except ParseError: 3281 should_fallback_to_string = True 3282 3283 if should_fallback_to_string or self._curr: 3284 self._retreat(start_index) 3285 return self._parse_hint_fallback_to_string() 3286 3287 return self.expression(exp.Hint, expressions=hints) 3288 3289 def _parse_hint(self) -> t.Optional[exp.Hint]: 3290 if self._match(TokenType.HINT) and self._prev_comments: 3291 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3292 3293 return None 3294 3295 def _parse_into(self) -> t.Optional[exp.Into]: 3296 if not self._match(TokenType.INTO): 3297 return None 3298 3299 temp = self._match(TokenType.TEMPORARY) 3300 unlogged = self._match_text_seq("UNLOGGED") 3301 self._match(TokenType.TABLE) 3302 3303 return self.expression( 3304 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3305 ) 3306 3307 def _parse_from( 3308 self, joins: bool = False, skip_from_token: bool = False 3309 ) -> t.Optional[exp.From]: 3310 if not skip_from_token and not self._match(TokenType.FROM): 3311 return None 3312 3313 return self.expression( 3314 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3315 ) 3316 3317 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3318 return self.expression( 3319 exp.MatchRecognizeMeasure, 3320 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3321 this=self._parse_expression(), 3322 ) 3323 3324 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3325 if not self._match(TokenType.MATCH_RECOGNIZE): 3326 return None 3327 3328 self._match_l_paren() 3329 3330 partition = self._parse_partition_by() 3331 order = self._parse_order() 3332 3333 measures = ( 3334 self._parse_csv(self._parse_match_recognize_measure) 3335 if self._match_text_seq("MEASURES") 3336 else None 3337 ) 3338 3339 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3340 rows = exp.var("ONE ROW PER MATCH") 3341 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3342 text = "ALL ROWS PER MATCH" 3343 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3344 text += " SHOW EMPTY MATCHES" 3345 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3346 text += " OMIT EMPTY MATCHES" 3347 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3348 text += " WITH UNMATCHED ROWS" 3349 rows = exp.var(text) 3350 else: 3351 rows = None 3352 3353 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3354 text = "AFTER MATCH SKIP" 3355 if self._match_text_seq("PAST", "LAST", "ROW"): 3356 text += " PAST LAST ROW" 3357 elif self._match_text_seq("TO", "NEXT", "ROW"): 3358 text += " TO NEXT ROW" 3359 elif self._match_text_seq("TO", "FIRST"): 3360 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3361 elif self._match_text_seq("TO", "LAST"): 3362 text += f" TO LAST {self._advance_any().text}" # type: ignore 3363 after = exp.var(text) 3364 else: 3365 after = None 3366 3367 if self._match_text_seq("PATTERN"): 3368 self._match_l_paren() 3369 3370 if not self._curr: 3371 self.raise_error("Expecting )", self._curr) 3372 3373 paren = 1 3374 start = self._curr 3375 3376 while self._curr and paren > 0: 3377 if self._curr.token_type == TokenType.L_PAREN: 3378 paren += 1 3379 if self._curr.token_type == TokenType.R_PAREN: 3380 paren -= 1 3381 3382 end = self._prev 3383 self._advance() 3384 3385 if paren > 0: 3386 self.raise_error("Expecting )", self._curr) 3387 3388 pattern = exp.var(self._find_sql(start, end)) 3389 else: 3390 pattern = None 3391 3392 define = ( 3393 self._parse_csv(self._parse_name_as_expression) 3394 if self._match_text_seq("DEFINE") 3395 else None 3396 ) 3397 3398 self._match_r_paren() 3399 3400 return self.expression( 3401 exp.MatchRecognize, 3402 partition_by=partition, 3403 order=order, 3404 measures=measures, 3405 rows=rows, 3406 after=after, 3407 pattern=pattern, 3408 define=define, 3409 alias=self._parse_table_alias(), 3410 ) 3411 3412 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3413 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3414 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3415 cross_apply = False 3416 3417 if cross_apply is not None: 3418 this = self._parse_select(table=True) 3419 view = None 3420 outer = None 3421 elif self._match(TokenType.LATERAL): 3422 this = self._parse_select(table=True) 3423 view = self._match(TokenType.VIEW) 3424 outer = self._match(TokenType.OUTER) 3425 else: 3426 return None 3427 3428 if not this: 3429 this = ( 3430 self._parse_unnest() 3431 or self._parse_function() 3432 or self._parse_id_var(any_token=False) 3433 ) 3434 3435 while self._match(TokenType.DOT): 3436 this = exp.Dot( 3437 this=this, 3438 expression=self._parse_function() or self._parse_id_var(any_token=False), 3439 ) 3440 3441 if view: 3442 table = self._parse_id_var(any_token=False) 3443 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3444 table_alias: t.Optional[exp.TableAlias] = self.expression( 3445 exp.TableAlias, this=table, columns=columns 3446 ) 3447 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3448 # We move the alias from the lateral's child node to the lateral itself 3449 table_alias = this.args["alias"].pop() 3450 else: 3451 table_alias = self._parse_table_alias() 3452 3453 return self.expression( 3454 exp.Lateral, 3455 this=this, 3456 view=view, 3457 outer=outer, 3458 alias=table_alias, 3459 cross_apply=cross_apply, 3460 ) 3461 3462 def _parse_join_parts( 3463 self, 3464 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3465 return ( 3466 self._match_set(self.JOIN_METHODS) and self._prev, 3467 self._match_set(self.JOIN_SIDES) and self._prev, 3468 self._match_set(self.JOIN_KINDS) and self._prev, 3469 ) 3470 3471 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3472 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3473 this = self._parse_column() 3474 if isinstance(this, exp.Column): 3475 return this.this 3476 return this 3477 3478 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3479 3480 def _parse_join( 3481 self, skip_join_token: bool = False, parse_bracket: bool = False 3482 ) -> t.Optional[exp.Join]: 3483 if self._match(TokenType.COMMA): 3484 return self.expression(exp.Join, this=self._parse_table()) 3485 3486 index = self._index 3487 method, side, kind = self._parse_join_parts() 3488 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3489 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3490 3491 if not skip_join_token and not join: 3492 self._retreat(index) 3493 kind = None 3494 method = None 3495 side = None 3496 3497 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3498 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3499 3500 if not skip_join_token and not join and not outer_apply and not cross_apply: 3501 return None 3502 3503 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3504 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3505 kwargs["expressions"] = self._parse_csv( 3506 lambda: self._parse_table(parse_bracket=parse_bracket) 3507 ) 3508 3509 if method: 3510 kwargs["method"] = method.text 3511 if side: 3512 kwargs["side"] = side.text 3513 if kind: 3514 kwargs["kind"] = kind.text 3515 if hint: 3516 kwargs["hint"] = hint 3517 3518 if self._match(TokenType.MATCH_CONDITION): 3519 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3520 3521 if self._match(TokenType.ON): 3522 kwargs["on"] = self._parse_assignment() 3523 elif self._match(TokenType.USING): 3524 kwargs["using"] = self._parse_using_identifiers() 3525 elif ( 3526 not (outer_apply or cross_apply) 3527 and not isinstance(kwargs["this"], exp.Unnest) 3528 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3529 ): 3530 index = self._index 3531 joins: t.Optional[list] = list(self._parse_joins()) 3532 3533 if joins and self._match(TokenType.ON): 3534 kwargs["on"] = self._parse_assignment() 3535 elif joins and self._match(TokenType.USING): 3536 kwargs["using"] = self._parse_using_identifiers() 3537 else: 3538 joins = None 3539 self._retreat(index) 3540 3541 kwargs["this"].set("joins", joins if joins else None) 3542 3543 comments = [c for token in (method, side, kind) if token for c in token.comments] 3544 return self.expression(exp.Join, comments=comments, **kwargs) 3545 3546 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3547 this = self._parse_assignment() 3548 3549 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3550 return this 3551 3552 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3553 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3554 3555 return this 3556 3557 def _parse_index_params(self) -> exp.IndexParameters: 3558 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3559 3560 if self._match(TokenType.L_PAREN, advance=False): 3561 columns = self._parse_wrapped_csv(self._parse_with_operator) 3562 else: 3563 columns = None 3564 3565 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3566 partition_by = self._parse_partition_by() 3567 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3568 tablespace = ( 3569 self._parse_var(any_token=True) 3570 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3571 else None 3572 ) 3573 where = self._parse_where() 3574 3575 on = self._parse_field() if self._match(TokenType.ON) else None 3576 3577 return self.expression( 3578 exp.IndexParameters, 3579 using=using, 3580 columns=columns, 3581 include=include, 3582 partition_by=partition_by, 3583 where=where, 3584 with_storage=with_storage, 3585 tablespace=tablespace, 3586 on=on, 3587 ) 3588 3589 def _parse_index( 3590 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3591 ) -> t.Optional[exp.Index]: 3592 if index or anonymous: 3593 unique = None 3594 primary = None 3595 amp = None 3596 3597 self._match(TokenType.ON) 3598 self._match(TokenType.TABLE) # hive 3599 table = self._parse_table_parts(schema=True) 3600 else: 3601 unique = self._match(TokenType.UNIQUE) 3602 primary = self._match_text_seq("PRIMARY") 3603 amp = self._match_text_seq("AMP") 3604 3605 if not self._match(TokenType.INDEX): 3606 return None 3607 3608 index = self._parse_id_var() 3609 table = None 3610 3611 params = self._parse_index_params() 3612 3613 return self.expression( 3614 exp.Index, 3615 this=index, 3616 table=table, 3617 unique=unique, 3618 primary=primary, 3619 amp=amp, 3620 params=params, 3621 ) 3622 3623 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3624 hints: t.List[exp.Expression] = [] 3625 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3626 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3627 hints.append( 3628 self.expression( 3629 exp.WithTableHint, 3630 expressions=self._parse_csv( 3631 lambda: self._parse_function() or self._parse_var(any_token=True) 3632 ), 3633 ) 3634 ) 3635 self._match_r_paren() 3636 else: 3637 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3638 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3639 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3640 3641 self._match_set((TokenType.INDEX, TokenType.KEY)) 3642 if self._match(TokenType.FOR): 3643 hint.set("target", self._advance_any() and self._prev.text.upper()) 3644 3645 hint.set("expressions", self._parse_wrapped_id_vars()) 3646 hints.append(hint) 3647 3648 return hints or None 3649 3650 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3651 return ( 3652 (not schema and self._parse_function(optional_parens=False)) 3653 or self._parse_id_var(any_token=False) 3654 or self._parse_string_as_identifier() 3655 or self._parse_placeholder() 3656 ) 3657 3658 def _parse_table_parts( 3659 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3660 ) -> exp.Table: 3661 catalog = None 3662 db = None 3663 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3664 3665 while self._match(TokenType.DOT): 3666 if catalog: 3667 # This allows nesting the table in arbitrarily many dot expressions if needed 3668 table = self.expression( 3669 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3670 ) 3671 else: 3672 catalog = db 3673 db = table 3674 # "" used for tsql FROM a..b case 3675 table = self._parse_table_part(schema=schema) or "" 3676 3677 if ( 3678 wildcard 3679 and self._is_connected() 3680 and (isinstance(table, exp.Identifier) or not table) 3681 and self._match(TokenType.STAR) 3682 ): 3683 if isinstance(table, exp.Identifier): 3684 table.args["this"] += "*" 3685 else: 3686 table = exp.Identifier(this="*") 3687 3688 # We bubble up comments from the Identifier to the Table 3689 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3690 3691 if is_db_reference: 3692 catalog = db 3693 db = table 3694 table = None 3695 3696 if not table and not is_db_reference: 3697 self.raise_error(f"Expected table name but got {self._curr}") 3698 if not db and is_db_reference: 3699 self.raise_error(f"Expected database name but got {self._curr}") 3700 3701 table = self.expression( 3702 exp.Table, 3703 comments=comments, 3704 this=table, 3705 db=db, 3706 catalog=catalog, 3707 ) 3708 3709 changes = self._parse_changes() 3710 if changes: 3711 table.set("changes", changes) 3712 3713 at_before = self._parse_historical_data() 3714 if at_before: 3715 table.set("when", at_before) 3716 3717 pivots = self._parse_pivots() 3718 if pivots: 3719 table.set("pivots", pivots) 3720 3721 return table 3722 3723 def _parse_table( 3724 self, 3725 schema: bool = False, 3726 joins: bool = False, 3727 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3728 parse_bracket: bool = False, 3729 is_db_reference: bool = False, 3730 parse_partition: bool = False, 3731 ) -> t.Optional[exp.Expression]: 3732 lateral = self._parse_lateral() 3733 if lateral: 3734 return lateral 3735 3736 unnest = self._parse_unnest() 3737 if unnest: 3738 return unnest 3739 3740 values = self._parse_derived_table_values() 3741 if values: 3742 return values 3743 3744 subquery = self._parse_select(table=True) 3745 if subquery: 3746 if not subquery.args.get("pivots"): 3747 subquery.set("pivots", self._parse_pivots()) 3748 return subquery 3749 3750 bracket = parse_bracket and self._parse_bracket(None) 3751 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3752 3753 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3754 self._parse_table 3755 ) 3756 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3757 3758 only = self._match(TokenType.ONLY) 3759 3760 this = t.cast( 3761 exp.Expression, 3762 bracket 3763 or rows_from 3764 or self._parse_bracket( 3765 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3766 ), 3767 ) 3768 3769 if only: 3770 this.set("only", only) 3771 3772 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3773 self._match_text_seq("*") 3774 3775 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3776 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3777 this.set("partition", self._parse_partition()) 3778 3779 if schema: 3780 return self._parse_schema(this=this) 3781 3782 version = self._parse_version() 3783 3784 if version: 3785 this.set("version", version) 3786 3787 if self.dialect.ALIAS_POST_TABLESAMPLE: 3788 this.set("sample", self._parse_table_sample()) 3789 3790 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3791 if alias: 3792 this.set("alias", alias) 3793 3794 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3795 return self.expression( 3796 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3797 ) 3798 3799 this.set("hints", self._parse_table_hints()) 3800 3801 if not this.args.get("pivots"): 3802 this.set("pivots", self._parse_pivots()) 3803 3804 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3805 this.set("sample", self._parse_table_sample()) 3806 3807 if joins: 3808 for join in self._parse_joins(): 3809 this.append("joins", join) 3810 3811 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3812 this.set("ordinality", True) 3813 this.set("alias", self._parse_table_alias()) 3814 3815 return this 3816 3817 def _parse_version(self) -> t.Optional[exp.Version]: 3818 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3819 this = "TIMESTAMP" 3820 elif self._match(TokenType.VERSION_SNAPSHOT): 3821 this = "VERSION" 3822 else: 3823 return None 3824 3825 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3826 kind = self._prev.text.upper() 3827 start = self._parse_bitwise() 3828 self._match_texts(("TO", "AND")) 3829 end = self._parse_bitwise() 3830 expression: t.Optional[exp.Expression] = self.expression( 3831 exp.Tuple, expressions=[start, end] 3832 ) 3833 elif self._match_text_seq("CONTAINED", "IN"): 3834 kind = "CONTAINED IN" 3835 expression = self.expression( 3836 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3837 ) 3838 elif self._match(TokenType.ALL): 3839 kind = "ALL" 3840 expression = None 3841 else: 3842 self._match_text_seq("AS", "OF") 3843 kind = "AS OF" 3844 expression = self._parse_type() 3845 3846 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3847 3848 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3849 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3850 index = self._index 3851 historical_data = None 3852 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3853 this = self._prev.text.upper() 3854 kind = ( 3855 self._match(TokenType.L_PAREN) 3856 and self._match_texts(self.HISTORICAL_DATA_KIND) 3857 and self._prev.text.upper() 3858 ) 3859 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3860 3861 if expression: 3862 self._match_r_paren() 3863 historical_data = self.expression( 3864 exp.HistoricalData, this=this, kind=kind, expression=expression 3865 ) 3866 else: 3867 self._retreat(index) 3868 3869 return historical_data 3870 3871 def _parse_changes(self) -> t.Optional[exp.Changes]: 3872 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3873 return None 3874 3875 information = self._parse_var(any_token=True) 3876 self._match_r_paren() 3877 3878 return self.expression( 3879 exp.Changes, 3880 information=information, 3881 at_before=self._parse_historical_data(), 3882 end=self._parse_historical_data(), 3883 ) 3884 3885 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3886 if not self._match(TokenType.UNNEST): 3887 return None 3888 3889 expressions = self._parse_wrapped_csv(self._parse_equality) 3890 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3891 3892 alias = self._parse_table_alias() if with_alias else None 3893 3894 if alias: 3895 if self.dialect.UNNEST_COLUMN_ONLY: 3896 if alias.args.get("columns"): 3897 self.raise_error("Unexpected extra column alias in unnest.") 3898 3899 alias.set("columns", [alias.this]) 3900 alias.set("this", None) 3901 3902 columns = alias.args.get("columns") or [] 3903 if offset and len(expressions) < len(columns): 3904 offset = columns.pop() 3905 3906 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3907 self._match(TokenType.ALIAS) 3908 offset = self._parse_id_var( 3909 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3910 ) or exp.to_identifier("offset") 3911 3912 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3913 3914 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3915 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3916 if not is_derived and not ( 3917 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3918 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3919 ): 3920 return None 3921 3922 expressions = self._parse_csv(self._parse_value) 3923 alias = self._parse_table_alias() 3924 3925 if is_derived: 3926 self._match_r_paren() 3927 3928 return self.expression( 3929 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3930 ) 3931 3932 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3933 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3934 as_modifier and self._match_text_seq("USING", "SAMPLE") 3935 ): 3936 return None 3937 3938 bucket_numerator = None 3939 bucket_denominator = None 3940 bucket_field = None 3941 percent = None 3942 size = None 3943 seed = None 3944 3945 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3946 matched_l_paren = self._match(TokenType.L_PAREN) 3947 3948 if self.TABLESAMPLE_CSV: 3949 num = None 3950 expressions = self._parse_csv(self._parse_primary) 3951 else: 3952 expressions = None 3953 num = ( 3954 self._parse_factor() 3955 if self._match(TokenType.NUMBER, advance=False) 3956 else self._parse_primary() or self._parse_placeholder() 3957 ) 3958 3959 if self._match_text_seq("BUCKET"): 3960 bucket_numerator = self._parse_number() 3961 self._match_text_seq("OUT", "OF") 3962 bucket_denominator = bucket_denominator = self._parse_number() 3963 self._match(TokenType.ON) 3964 bucket_field = self._parse_field() 3965 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3966 percent = num 3967 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3968 size = num 3969 else: 3970 percent = num 3971 3972 if matched_l_paren: 3973 self._match_r_paren() 3974 3975 if self._match(TokenType.L_PAREN): 3976 method = self._parse_var(upper=True) 3977 seed = self._match(TokenType.COMMA) and self._parse_number() 3978 self._match_r_paren() 3979 elif self._match_texts(("SEED", "REPEATABLE")): 3980 seed = self._parse_wrapped(self._parse_number) 3981 3982 if not method and self.DEFAULT_SAMPLING_METHOD: 3983 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3984 3985 return self.expression( 3986 exp.TableSample, 3987 expressions=expressions, 3988 method=method, 3989 bucket_numerator=bucket_numerator, 3990 bucket_denominator=bucket_denominator, 3991 bucket_field=bucket_field, 3992 percent=percent, 3993 size=size, 3994 seed=seed, 3995 ) 3996 3997 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3998 return list(iter(self._parse_pivot, None)) or None 3999 4000 def _parse_joins(self) -> t.Iterator[exp.Join]: 4001 return iter(self._parse_join, None) 4002 4003 # https://duckdb.org/docs/sql/statements/pivot 4004 def _parse_simplified_pivot(self) -> exp.Pivot: 4005 def _parse_on() -> t.Optional[exp.Expression]: 4006 this = self._parse_bitwise() 4007 return self._parse_in(this) if self._match(TokenType.IN) else this 4008 4009 this = self._parse_table() 4010 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4011 using = self._match(TokenType.USING) and self._parse_csv( 4012 lambda: self._parse_alias(self._parse_function()) 4013 ) 4014 group = self._parse_group() 4015 return self.expression( 4016 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4017 ) 4018 4019 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4020 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4021 this = self._parse_select_or_expression() 4022 4023 self._match(TokenType.ALIAS) 4024 alias = self._parse_bitwise() 4025 if alias: 4026 if isinstance(alias, exp.Column) and not alias.db: 4027 alias = alias.this 4028 return self.expression(exp.PivotAlias, this=this, alias=alias) 4029 4030 return this 4031 4032 value = self._parse_column() 4033 4034 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4035 self.raise_error("Expecting IN (") 4036 4037 if self._match(TokenType.ANY): 4038 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4039 else: 4040 exprs = self._parse_csv(_parse_aliased_expression) 4041 4042 self._match_r_paren() 4043 return self.expression(exp.In, this=value, expressions=exprs) 4044 4045 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4046 index = self._index 4047 include_nulls = None 4048 4049 if self._match(TokenType.PIVOT): 4050 unpivot = False 4051 elif self._match(TokenType.UNPIVOT): 4052 unpivot = True 4053 4054 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4055 if self._match_text_seq("INCLUDE", "NULLS"): 4056 include_nulls = True 4057 elif self._match_text_seq("EXCLUDE", "NULLS"): 4058 include_nulls = False 4059 else: 4060 return None 4061 4062 expressions = [] 4063 4064 if not self._match(TokenType.L_PAREN): 4065 self._retreat(index) 4066 return None 4067 4068 if unpivot: 4069 expressions = self._parse_csv(self._parse_column) 4070 else: 4071 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4072 4073 if not expressions: 4074 self.raise_error("Failed to parse PIVOT's aggregation list") 4075 4076 if not self._match(TokenType.FOR): 4077 self.raise_error("Expecting FOR") 4078 4079 field = self._parse_pivot_in() 4080 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4081 self._parse_bitwise 4082 ) 4083 4084 self._match_r_paren() 4085 4086 pivot = self.expression( 4087 exp.Pivot, 4088 expressions=expressions, 4089 field=field, 4090 unpivot=unpivot, 4091 include_nulls=include_nulls, 4092 default_on_null=default_on_null, 4093 ) 4094 4095 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4096 pivot.set("alias", self._parse_table_alias()) 4097 4098 if not unpivot: 4099 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4100 4101 columns: t.List[exp.Expression] = [] 4102 for fld in pivot.args["field"].expressions: 4103 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4104 for name in names: 4105 if self.PREFIXED_PIVOT_COLUMNS: 4106 name = f"{name}_{field_name}" if name else field_name 4107 else: 4108 name = f"{field_name}_{name}" if name else field_name 4109 4110 columns.append(exp.to_identifier(name)) 4111 4112 pivot.set("columns", columns) 4113 4114 return pivot 4115 4116 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4117 return [agg.alias for agg in aggregations] 4118 4119 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4120 if not skip_where_token and not self._match(TokenType.PREWHERE): 4121 return None 4122 4123 return self.expression( 4124 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4125 ) 4126 4127 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4128 if not skip_where_token and not self._match(TokenType.WHERE): 4129 return None 4130 4131 return self.expression( 4132 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4133 ) 4134 4135 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4136 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4137 return None 4138 4139 elements: t.Dict[str, t.Any] = defaultdict(list) 4140 4141 if self._match(TokenType.ALL): 4142 elements["all"] = True 4143 elif self._match(TokenType.DISTINCT): 4144 elements["all"] = False 4145 4146 while True: 4147 index = self._index 4148 4149 elements["expressions"].extend( 4150 self._parse_csv( 4151 lambda: None 4152 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4153 else self._parse_assignment() 4154 ) 4155 ) 4156 4157 before_with_index = self._index 4158 with_prefix = self._match(TokenType.WITH) 4159 4160 if self._match(TokenType.ROLLUP): 4161 elements["rollup"].append( 4162 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4163 ) 4164 elif self._match(TokenType.CUBE): 4165 elements["cube"].append( 4166 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4167 ) 4168 elif self._match(TokenType.GROUPING_SETS): 4169 elements["grouping_sets"].append( 4170 self.expression( 4171 exp.GroupingSets, 4172 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4173 ) 4174 ) 4175 elif self._match_text_seq("TOTALS"): 4176 elements["totals"] = True # type: ignore 4177 4178 if before_with_index <= self._index <= before_with_index + 1: 4179 self._retreat(before_with_index) 4180 break 4181 4182 if index == self._index: 4183 break 4184 4185 return self.expression(exp.Group, **elements) # type: ignore 4186 4187 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4188 return self.expression( 4189 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4190 ) 4191 4192 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4193 if self._match(TokenType.L_PAREN): 4194 grouping_set = self._parse_csv(self._parse_column) 4195 self._match_r_paren() 4196 return self.expression(exp.Tuple, expressions=grouping_set) 4197 4198 return self._parse_column() 4199 4200 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4201 if not skip_having_token and not self._match(TokenType.HAVING): 4202 return None 4203 return self.expression(exp.Having, this=self._parse_assignment()) 4204 4205 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4206 if not self._match(TokenType.QUALIFY): 4207 return None 4208 return self.expression(exp.Qualify, this=self._parse_assignment()) 4209 4210 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4211 if skip_start_token: 4212 start = None 4213 elif self._match(TokenType.START_WITH): 4214 start = self._parse_assignment() 4215 else: 4216 return None 4217 4218 self._match(TokenType.CONNECT_BY) 4219 nocycle = self._match_text_seq("NOCYCLE") 4220 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4221 exp.Prior, this=self._parse_bitwise() 4222 ) 4223 connect = self._parse_assignment() 4224 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4225 4226 if not start and self._match(TokenType.START_WITH): 4227 start = self._parse_assignment() 4228 4229 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4230 4231 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4232 this = self._parse_id_var(any_token=True) 4233 if self._match(TokenType.ALIAS): 4234 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4235 return this 4236 4237 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4238 if self._match_text_seq("INTERPOLATE"): 4239 return self._parse_wrapped_csv(self._parse_name_as_expression) 4240 return None 4241 4242 def _parse_order( 4243 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4244 ) -> t.Optional[exp.Expression]: 4245 siblings = None 4246 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4247 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4248 return this 4249 4250 siblings = True 4251 4252 return self.expression( 4253 exp.Order, 4254 this=this, 4255 expressions=self._parse_csv(self._parse_ordered), 4256 siblings=siblings, 4257 ) 4258 4259 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4260 if not self._match(token): 4261 return None 4262 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4263 4264 def _parse_ordered( 4265 self, parse_method: t.Optional[t.Callable] = None 4266 ) -> t.Optional[exp.Ordered]: 4267 this = parse_method() if parse_method else self._parse_assignment() 4268 if not this: 4269 return None 4270 4271 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4272 this = exp.var("ALL") 4273 4274 asc = self._match(TokenType.ASC) 4275 desc = self._match(TokenType.DESC) or (asc and False) 4276 4277 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4278 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4279 4280 nulls_first = is_nulls_first or False 4281 explicitly_null_ordered = is_nulls_first or is_nulls_last 4282 4283 if ( 4284 not explicitly_null_ordered 4285 and ( 4286 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4287 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4288 ) 4289 and self.dialect.NULL_ORDERING != "nulls_are_last" 4290 ): 4291 nulls_first = True 4292 4293 if self._match_text_seq("WITH", "FILL"): 4294 with_fill = self.expression( 4295 exp.WithFill, 4296 **{ # type: ignore 4297 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4298 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4299 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4300 "interpolate": self._parse_interpolate(), 4301 }, 4302 ) 4303 else: 4304 with_fill = None 4305 4306 return self.expression( 4307 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4308 ) 4309 4310 def _parse_limit( 4311 self, 4312 this: t.Optional[exp.Expression] = None, 4313 top: bool = False, 4314 skip_limit_token: bool = False, 4315 ) -> t.Optional[exp.Expression]: 4316 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4317 comments = self._prev_comments 4318 if top: 4319 limit_paren = self._match(TokenType.L_PAREN) 4320 expression = self._parse_term() if limit_paren else self._parse_number() 4321 4322 if limit_paren: 4323 self._match_r_paren() 4324 else: 4325 expression = self._parse_term() 4326 4327 if self._match(TokenType.COMMA): 4328 offset = expression 4329 expression = self._parse_term() 4330 else: 4331 offset = None 4332 4333 limit_exp = self.expression( 4334 exp.Limit, 4335 this=this, 4336 expression=expression, 4337 offset=offset, 4338 comments=comments, 4339 expressions=self._parse_limit_by(), 4340 ) 4341 4342 return limit_exp 4343 4344 if self._match(TokenType.FETCH): 4345 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4346 direction = self._prev.text.upper() if direction else "FIRST" 4347 4348 count = self._parse_field(tokens=self.FETCH_TOKENS) 4349 percent = self._match(TokenType.PERCENT) 4350 4351 self._match_set((TokenType.ROW, TokenType.ROWS)) 4352 4353 only = self._match_text_seq("ONLY") 4354 with_ties = self._match_text_seq("WITH", "TIES") 4355 4356 if only and with_ties: 4357 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4358 4359 return self.expression( 4360 exp.Fetch, 4361 direction=direction, 4362 count=count, 4363 percent=percent, 4364 with_ties=with_ties, 4365 ) 4366 4367 return this 4368 4369 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4370 if not self._match(TokenType.OFFSET): 4371 return this 4372 4373 count = self._parse_term() 4374 self._match_set((TokenType.ROW, TokenType.ROWS)) 4375 4376 return self.expression( 4377 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4378 ) 4379 4380 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4381 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4382 4383 def _parse_locks(self) -> t.List[exp.Lock]: 4384 locks = [] 4385 while True: 4386 if self._match_text_seq("FOR", "UPDATE"): 4387 update = True 4388 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4389 "LOCK", "IN", "SHARE", "MODE" 4390 ): 4391 update = False 4392 else: 4393 break 4394 4395 expressions = None 4396 if self._match_text_seq("OF"): 4397 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4398 4399 wait: t.Optional[bool | exp.Expression] = None 4400 if self._match_text_seq("NOWAIT"): 4401 wait = True 4402 elif self._match_text_seq("WAIT"): 4403 wait = self._parse_primary() 4404 elif self._match_text_seq("SKIP", "LOCKED"): 4405 wait = False 4406 4407 locks.append( 4408 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4409 ) 4410 4411 return locks 4412 4413 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4414 while this and self._match_set(self.SET_OPERATIONS): 4415 token_type = self._prev.token_type 4416 4417 if token_type == TokenType.UNION: 4418 operation: t.Type[exp.SetOperation] = exp.Union 4419 elif token_type == TokenType.EXCEPT: 4420 operation = exp.Except 4421 else: 4422 operation = exp.Intersect 4423 4424 comments = self._prev.comments 4425 4426 if self._match(TokenType.DISTINCT): 4427 distinct: t.Optional[bool] = True 4428 elif self._match(TokenType.ALL): 4429 distinct = False 4430 else: 4431 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4432 if distinct is None: 4433 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4434 4435 by_name = self._match_text_seq("BY", "NAME") 4436 expression = self._parse_select(nested=True, parse_set_operation=False) 4437 4438 this = self.expression( 4439 operation, 4440 comments=comments, 4441 this=this, 4442 distinct=distinct, 4443 by_name=by_name, 4444 expression=expression, 4445 ) 4446 4447 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4448 expression = this.expression 4449 4450 if expression: 4451 for arg in self.SET_OP_MODIFIERS: 4452 expr = expression.args.get(arg) 4453 if expr: 4454 this.set(arg, expr.pop()) 4455 4456 return this 4457 4458 def _parse_expression(self) -> t.Optional[exp.Expression]: 4459 return self._parse_alias(self._parse_assignment()) 4460 4461 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4462 this = self._parse_disjunction() 4463 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4464 # This allows us to parse <non-identifier token> := <expr> 4465 this = exp.column( 4466 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4467 ) 4468 4469 while self._match_set(self.ASSIGNMENT): 4470 if isinstance(this, exp.Column) and len(this.parts) == 1: 4471 this = this.this 4472 4473 this = self.expression( 4474 self.ASSIGNMENT[self._prev.token_type], 4475 this=this, 4476 comments=self._prev_comments, 4477 expression=self._parse_assignment(), 4478 ) 4479 4480 return this 4481 4482 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4483 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4484 4485 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4486 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4487 4488 def _parse_equality(self) -> t.Optional[exp.Expression]: 4489 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4490 4491 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4492 return self._parse_tokens(self._parse_range, self.COMPARISON) 4493 4494 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4495 this = this or self._parse_bitwise() 4496 negate = self._match(TokenType.NOT) 4497 4498 if self._match_set(self.RANGE_PARSERS): 4499 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4500 if not expression: 4501 return this 4502 4503 this = expression 4504 elif self._match(TokenType.ISNULL): 4505 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4506 4507 # Postgres supports ISNULL and NOTNULL for conditions. 4508 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4509 if self._match(TokenType.NOTNULL): 4510 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4511 this = self.expression(exp.Not, this=this) 4512 4513 if negate: 4514 this = self._negate_range(this) 4515 4516 if self._match(TokenType.IS): 4517 this = self._parse_is(this) 4518 4519 return this 4520 4521 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4522 if not this: 4523 return this 4524 4525 return self.expression(exp.Not, this=this) 4526 4527 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4528 index = self._index - 1 4529 negate = self._match(TokenType.NOT) 4530 4531 if self._match_text_seq("DISTINCT", "FROM"): 4532 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4533 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4534 4535 if self._match(TokenType.JSON): 4536 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4537 4538 if self._match_text_seq("WITH"): 4539 _with = True 4540 elif self._match_text_seq("WITHOUT"): 4541 _with = False 4542 else: 4543 _with = None 4544 4545 unique = self._match(TokenType.UNIQUE) 4546 self._match_text_seq("KEYS") 4547 expression: t.Optional[exp.Expression] = self.expression( 4548 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4549 ) 4550 else: 4551 expression = self._parse_primary() or self._parse_null() 4552 if not expression: 4553 self._retreat(index) 4554 return None 4555 4556 this = self.expression(exp.Is, this=this, expression=expression) 4557 return self.expression(exp.Not, this=this) if negate else this 4558 4559 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4560 unnest = self._parse_unnest(with_alias=False) 4561 if unnest: 4562 this = self.expression(exp.In, this=this, unnest=unnest) 4563 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4564 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4565 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4566 4567 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4568 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4569 else: 4570 this = self.expression(exp.In, this=this, expressions=expressions) 4571 4572 if matched_l_paren: 4573 self._match_r_paren(this) 4574 elif not self._match(TokenType.R_BRACKET, expression=this): 4575 self.raise_error("Expecting ]") 4576 else: 4577 this = self.expression(exp.In, this=this, field=self._parse_column()) 4578 4579 return this 4580 4581 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4582 low = self._parse_bitwise() 4583 self._match(TokenType.AND) 4584 high = self._parse_bitwise() 4585 return self.expression(exp.Between, this=this, low=low, high=high) 4586 4587 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4588 if not self._match(TokenType.ESCAPE): 4589 return this 4590 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4591 4592 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4593 index = self._index 4594 4595 if not self._match(TokenType.INTERVAL) and match_interval: 4596 return None 4597 4598 if self._match(TokenType.STRING, advance=False): 4599 this = self._parse_primary() 4600 else: 4601 this = self._parse_term() 4602 4603 if not this or ( 4604 isinstance(this, exp.Column) 4605 and not this.table 4606 and not this.this.quoted 4607 and this.name.upper() == "IS" 4608 ): 4609 self._retreat(index) 4610 return None 4611 4612 unit = self._parse_function() or ( 4613 not self._match(TokenType.ALIAS, advance=False) 4614 and self._parse_var(any_token=True, upper=True) 4615 ) 4616 4617 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4618 # each INTERVAL expression into this canonical form so it's easy to transpile 4619 if this and this.is_number: 4620 this = exp.Literal.string(this.to_py()) 4621 elif this and this.is_string: 4622 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4623 if parts and unit: 4624 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4625 unit = None 4626 self._retreat(self._index - 1) 4627 4628 if len(parts) == 1: 4629 this = exp.Literal.string(parts[0][0]) 4630 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4631 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4632 unit = self.expression( 4633 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4634 ) 4635 4636 interval = self.expression(exp.Interval, this=this, unit=unit) 4637 4638 index = self._index 4639 self._match(TokenType.PLUS) 4640 4641 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4642 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4643 return self.expression( 4644 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4645 ) 4646 4647 self._retreat(index) 4648 return interval 4649 4650 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4651 this = self._parse_term() 4652 4653 while True: 4654 if self._match_set(self.BITWISE): 4655 this = self.expression( 4656 self.BITWISE[self._prev.token_type], 4657 this=this, 4658 expression=self._parse_term(), 4659 ) 4660 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4661 this = self.expression( 4662 exp.DPipe, 4663 this=this, 4664 expression=self._parse_term(), 4665 safe=not self.dialect.STRICT_STRING_CONCAT, 4666 ) 4667 elif self._match(TokenType.DQMARK): 4668 this = self.expression( 4669 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4670 ) 4671 elif self._match_pair(TokenType.LT, TokenType.LT): 4672 this = self.expression( 4673 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4674 ) 4675 elif self._match_pair(TokenType.GT, TokenType.GT): 4676 this = self.expression( 4677 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4678 ) 4679 else: 4680 break 4681 4682 return this 4683 4684 def _parse_term(self) -> t.Optional[exp.Expression]: 4685 this = self._parse_factor() 4686 4687 while self._match_set(self.TERM): 4688 klass = self.TERM[self._prev.token_type] 4689 comments = self._prev_comments 4690 expression = self._parse_factor() 4691 4692 this = self.expression(klass, this=this, comments=comments, expression=expression) 4693 4694 if isinstance(this, exp.Collate): 4695 expr = this.expression 4696 4697 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4698 # fallback to Identifier / Var 4699 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4700 ident = expr.this 4701 if isinstance(ident, exp.Identifier): 4702 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4703 4704 return this 4705 4706 def _parse_factor(self) -> t.Optional[exp.Expression]: 4707 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4708 this = parse_method() 4709 4710 while self._match_set(self.FACTOR): 4711 klass = self.FACTOR[self._prev.token_type] 4712 comments = self._prev_comments 4713 expression = parse_method() 4714 4715 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4716 self._retreat(self._index - 1) 4717 return this 4718 4719 this = self.expression(klass, this=this, comments=comments, expression=expression) 4720 4721 if isinstance(this, exp.Div): 4722 this.args["typed"] = self.dialect.TYPED_DIVISION 4723 this.args["safe"] = self.dialect.SAFE_DIVISION 4724 4725 return this 4726 4727 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4728 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4729 4730 def _parse_unary(self) -> t.Optional[exp.Expression]: 4731 if self._match_set(self.UNARY_PARSERS): 4732 return self.UNARY_PARSERS[self._prev.token_type](self) 4733 return self._parse_at_time_zone(self._parse_type()) 4734 4735 def _parse_type( 4736 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4737 ) -> t.Optional[exp.Expression]: 4738 interval = parse_interval and self._parse_interval() 4739 if interval: 4740 return interval 4741 4742 index = self._index 4743 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4744 4745 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4746 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4747 if isinstance(data_type, exp.Cast): 4748 # This constructor can contain ops directly after it, for instance struct unnesting: 4749 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4750 return self._parse_column_ops(data_type) 4751 4752 if data_type: 4753 index2 = self._index 4754 this = self._parse_primary() 4755 4756 if isinstance(this, exp.Literal): 4757 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4758 if parser: 4759 return parser(self, this, data_type) 4760 4761 return self.expression(exp.Cast, this=this, to=data_type) 4762 4763 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4764 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4765 # 4766 # If the index difference here is greater than 1, that means the parser itself must have 4767 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4768 # 4769 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4770 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4771 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4772 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4773 # 4774 # In these cases, we don't really want to return the converted type, but instead retreat 4775 # and try to parse a Column or Identifier in the section below. 4776 if data_type.expressions and index2 - index > 1: 4777 self._retreat(index2) 4778 return self._parse_column_ops(data_type) 4779 4780 self._retreat(index) 4781 4782 if fallback_to_identifier: 4783 return self._parse_id_var() 4784 4785 this = self._parse_column() 4786 return this and self._parse_column_ops(this) 4787 4788 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4789 this = self._parse_type() 4790 if not this: 4791 return None 4792 4793 if isinstance(this, exp.Column) and not this.table: 4794 this = exp.var(this.name.upper()) 4795 4796 return self.expression( 4797 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4798 ) 4799 4800 def _parse_types( 4801 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4802 ) -> t.Optional[exp.Expression]: 4803 index = self._index 4804 4805 this: t.Optional[exp.Expression] = None 4806 prefix = self._match_text_seq("SYSUDTLIB", ".") 4807 4808 if not self._match_set(self.TYPE_TOKENS): 4809 identifier = allow_identifiers and self._parse_id_var( 4810 any_token=False, tokens=(TokenType.VAR,) 4811 ) 4812 if isinstance(identifier, exp.Identifier): 4813 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4814 4815 if len(tokens) != 1: 4816 self.raise_error("Unexpected identifier", self._prev) 4817 4818 if tokens[0].token_type in self.TYPE_TOKENS: 4819 self._prev = tokens[0] 4820 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4821 type_name = identifier.name 4822 4823 while self._match(TokenType.DOT): 4824 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4825 4826 this = exp.DataType.build(type_name, udt=True) 4827 else: 4828 self._retreat(self._index - 1) 4829 return None 4830 else: 4831 return None 4832 4833 type_token = self._prev.token_type 4834 4835 if type_token == TokenType.PSEUDO_TYPE: 4836 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4837 4838 if type_token == TokenType.OBJECT_IDENTIFIER: 4839 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4840 4841 # https://materialize.com/docs/sql/types/map/ 4842 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4843 key_type = self._parse_types( 4844 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4845 ) 4846 if not self._match(TokenType.FARROW): 4847 self._retreat(index) 4848 return None 4849 4850 value_type = self._parse_types( 4851 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4852 ) 4853 if not self._match(TokenType.R_BRACKET): 4854 self._retreat(index) 4855 return None 4856 4857 return exp.DataType( 4858 this=exp.DataType.Type.MAP, 4859 expressions=[key_type, value_type], 4860 nested=True, 4861 prefix=prefix, 4862 ) 4863 4864 nested = type_token in self.NESTED_TYPE_TOKENS 4865 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4866 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4867 expressions = None 4868 maybe_func = False 4869 4870 if self._match(TokenType.L_PAREN): 4871 if is_struct: 4872 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4873 elif nested: 4874 expressions = self._parse_csv( 4875 lambda: self._parse_types( 4876 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4877 ) 4878 ) 4879 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4880 this = expressions[0] 4881 this.set("nullable", True) 4882 self._match_r_paren() 4883 return this 4884 elif type_token in self.ENUM_TYPE_TOKENS: 4885 expressions = self._parse_csv(self._parse_equality) 4886 elif is_aggregate: 4887 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4888 any_token=False, tokens=(TokenType.VAR,) 4889 ) 4890 if not func_or_ident or not self._match(TokenType.COMMA): 4891 return None 4892 expressions = self._parse_csv( 4893 lambda: self._parse_types( 4894 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4895 ) 4896 ) 4897 expressions.insert(0, func_or_ident) 4898 else: 4899 expressions = self._parse_csv(self._parse_type_size) 4900 4901 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4902 if type_token == TokenType.VECTOR and len(expressions) == 2: 4903 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4904 4905 if not expressions or not self._match(TokenType.R_PAREN): 4906 self._retreat(index) 4907 return None 4908 4909 maybe_func = True 4910 4911 values: t.Optional[t.List[exp.Expression]] = None 4912 4913 if nested and self._match(TokenType.LT): 4914 if is_struct: 4915 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4916 else: 4917 expressions = self._parse_csv( 4918 lambda: self._parse_types( 4919 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4920 ) 4921 ) 4922 4923 if not self._match(TokenType.GT): 4924 self.raise_error("Expecting >") 4925 4926 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4927 values = self._parse_csv(self._parse_assignment) 4928 if not values and is_struct: 4929 values = None 4930 self._retreat(self._index - 1) 4931 else: 4932 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4933 4934 if type_token in self.TIMESTAMPS: 4935 if self._match_text_seq("WITH", "TIME", "ZONE"): 4936 maybe_func = False 4937 tz_type = ( 4938 exp.DataType.Type.TIMETZ 4939 if type_token in self.TIMES 4940 else exp.DataType.Type.TIMESTAMPTZ 4941 ) 4942 this = exp.DataType(this=tz_type, expressions=expressions) 4943 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4944 maybe_func = False 4945 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4946 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4947 maybe_func = False 4948 elif type_token == TokenType.INTERVAL: 4949 unit = self._parse_var(upper=True) 4950 if unit: 4951 if self._match_text_seq("TO"): 4952 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4953 4954 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4955 else: 4956 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4957 4958 if maybe_func and check_func: 4959 index2 = self._index 4960 peek = self._parse_string() 4961 4962 if not peek: 4963 self._retreat(index) 4964 return None 4965 4966 self._retreat(index2) 4967 4968 if not this: 4969 if self._match_text_seq("UNSIGNED"): 4970 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4971 if not unsigned_type_token: 4972 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4973 4974 type_token = unsigned_type_token or type_token 4975 4976 this = exp.DataType( 4977 this=exp.DataType.Type[type_token.value], 4978 expressions=expressions, 4979 nested=nested, 4980 prefix=prefix, 4981 ) 4982 4983 # Empty arrays/structs are allowed 4984 if values is not None: 4985 cls = exp.Struct if is_struct else exp.Array 4986 this = exp.cast(cls(expressions=values), this, copy=False) 4987 4988 elif expressions: 4989 this.set("expressions", expressions) 4990 4991 # https://materialize.com/docs/sql/types/list/#type-name 4992 while self._match(TokenType.LIST): 4993 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4994 4995 index = self._index 4996 4997 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4998 matched_array = self._match(TokenType.ARRAY) 4999 5000 while self._curr: 5001 datatype_token = self._prev.token_type 5002 matched_l_bracket = self._match(TokenType.L_BRACKET) 5003 if not matched_l_bracket and not matched_array: 5004 break 5005 5006 matched_array = False 5007 values = self._parse_csv(self._parse_assignment) or None 5008 if ( 5009 values 5010 and not schema 5011 and ( 5012 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5013 ) 5014 ): 5015 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5016 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5017 self._retreat(index) 5018 break 5019 5020 this = exp.DataType( 5021 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5022 ) 5023 self._match(TokenType.R_BRACKET) 5024 5025 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5026 converter = self.TYPE_CONVERTERS.get(this.this) 5027 if converter: 5028 this = converter(t.cast(exp.DataType, this)) 5029 5030 return this 5031 5032 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5033 index = self._index 5034 5035 if ( 5036 self._curr 5037 and self._next 5038 and self._curr.token_type in self.TYPE_TOKENS 5039 and self._next.token_type in self.TYPE_TOKENS 5040 ): 5041 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5042 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5043 this = self._parse_id_var() 5044 else: 5045 this = ( 5046 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5047 or self._parse_id_var() 5048 ) 5049 5050 self._match(TokenType.COLON) 5051 5052 if ( 5053 type_required 5054 and not isinstance(this, exp.DataType) 5055 and not self._match_set(self.TYPE_TOKENS, advance=False) 5056 ): 5057 self._retreat(index) 5058 return self._parse_types() 5059 5060 return self._parse_column_def(this) 5061 5062 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5063 if not self._match_text_seq("AT", "TIME", "ZONE"): 5064 return this 5065 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5066 5067 def _parse_column(self) -> t.Optional[exp.Expression]: 5068 this = self._parse_column_reference() 5069 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5070 5071 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5072 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5073 5074 return column 5075 5076 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5077 this = self._parse_field() 5078 if ( 5079 not this 5080 and self._match(TokenType.VALUES, advance=False) 5081 and self.VALUES_FOLLOWED_BY_PAREN 5082 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5083 ): 5084 this = self._parse_id_var() 5085 5086 if isinstance(this, exp.Identifier): 5087 # We bubble up comments from the Identifier to the Column 5088 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5089 5090 return this 5091 5092 def _parse_colon_as_variant_extract( 5093 self, this: t.Optional[exp.Expression] 5094 ) -> t.Optional[exp.Expression]: 5095 casts = [] 5096 json_path = [] 5097 escape = None 5098 5099 while self._match(TokenType.COLON): 5100 start_index = self._index 5101 5102 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5103 path = self._parse_column_ops( 5104 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5105 ) 5106 5107 # The cast :: operator has a lower precedence than the extraction operator :, so 5108 # we rearrange the AST appropriately to avoid casting the JSON path 5109 while isinstance(path, exp.Cast): 5110 casts.append(path.to) 5111 path = path.this 5112 5113 if casts: 5114 dcolon_offset = next( 5115 i 5116 for i, t in enumerate(self._tokens[start_index:]) 5117 if t.token_type == TokenType.DCOLON 5118 ) 5119 end_token = self._tokens[start_index + dcolon_offset - 1] 5120 else: 5121 end_token = self._prev 5122 5123 if path: 5124 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5125 # it'll roundtrip to a string literal in GET_PATH 5126 if isinstance(path, exp.Identifier) and path.quoted: 5127 escape = True 5128 5129 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5130 5131 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5132 # Databricks transforms it back to the colon/dot notation 5133 if json_path: 5134 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5135 5136 if json_path_expr: 5137 json_path_expr.set("escape", escape) 5138 5139 this = self.expression( 5140 exp.JSONExtract, 5141 this=this, 5142 expression=json_path_expr, 5143 variant_extract=True, 5144 ) 5145 5146 while casts: 5147 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5148 5149 return this 5150 5151 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5152 return self._parse_types() 5153 5154 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5155 this = self._parse_bracket(this) 5156 5157 while self._match_set(self.COLUMN_OPERATORS): 5158 op_token = self._prev.token_type 5159 op = self.COLUMN_OPERATORS.get(op_token) 5160 5161 if op_token == TokenType.DCOLON: 5162 field = self._parse_dcolon() 5163 if not field: 5164 self.raise_error("Expected type") 5165 elif op and self._curr: 5166 field = self._parse_column_reference() or self._parse_bracket() 5167 else: 5168 field = self._parse_field(any_token=True, anonymous_func=True) 5169 5170 if isinstance(field, (exp.Func, exp.Window)) and this: 5171 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5172 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5173 this = exp.replace_tree( 5174 this, 5175 lambda n: ( 5176 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5177 if n.table 5178 else n.this 5179 ) 5180 if isinstance(n, exp.Column) 5181 else n, 5182 ) 5183 5184 if op: 5185 this = op(self, this, field) 5186 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5187 this = self.expression( 5188 exp.Column, 5189 comments=this.comments, 5190 this=field, 5191 table=this.this, 5192 db=this.args.get("table"), 5193 catalog=this.args.get("db"), 5194 ) 5195 elif isinstance(field, exp.Window): 5196 # Move the exp.Dot's to the window's function 5197 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5198 field.set("this", window_func) 5199 this = field 5200 else: 5201 this = self.expression(exp.Dot, this=this, expression=field) 5202 5203 if field and field.comments: 5204 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5205 5206 this = self._parse_bracket(this) 5207 5208 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5209 5210 def _parse_primary(self) -> t.Optional[exp.Expression]: 5211 if self._match_set(self.PRIMARY_PARSERS): 5212 token_type = self._prev.token_type 5213 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5214 5215 if token_type == TokenType.STRING: 5216 expressions = [primary] 5217 while self._match(TokenType.STRING): 5218 expressions.append(exp.Literal.string(self._prev.text)) 5219 5220 if len(expressions) > 1: 5221 return self.expression(exp.Concat, expressions=expressions) 5222 5223 return primary 5224 5225 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5226 return exp.Literal.number(f"0.{self._prev.text}") 5227 5228 if self._match(TokenType.L_PAREN): 5229 comments = self._prev_comments 5230 query = self._parse_select() 5231 5232 if query: 5233 expressions = [query] 5234 else: 5235 expressions = self._parse_expressions() 5236 5237 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5238 5239 if not this and self._match(TokenType.R_PAREN, advance=False): 5240 this = self.expression(exp.Tuple) 5241 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5242 this = self._parse_subquery(this=this, parse_alias=False) 5243 elif isinstance(this, exp.Subquery): 5244 this = self._parse_subquery( 5245 this=self._parse_set_operations(this), parse_alias=False 5246 ) 5247 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5248 this = self.expression(exp.Tuple, expressions=expressions) 5249 else: 5250 this = self.expression(exp.Paren, this=this) 5251 5252 if this: 5253 this.add_comments(comments) 5254 5255 self._match_r_paren(expression=this) 5256 return this 5257 5258 return None 5259 5260 def _parse_field( 5261 self, 5262 any_token: bool = False, 5263 tokens: t.Optional[t.Collection[TokenType]] = None, 5264 anonymous_func: bool = False, 5265 ) -> t.Optional[exp.Expression]: 5266 if anonymous_func: 5267 field = ( 5268 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5269 or self._parse_primary() 5270 ) 5271 else: 5272 field = self._parse_primary() or self._parse_function( 5273 anonymous=anonymous_func, any_token=any_token 5274 ) 5275 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5276 5277 def _parse_function( 5278 self, 5279 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5280 anonymous: bool = False, 5281 optional_parens: bool = True, 5282 any_token: bool = False, 5283 ) -> t.Optional[exp.Expression]: 5284 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5285 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5286 fn_syntax = False 5287 if ( 5288 self._match(TokenType.L_BRACE, advance=False) 5289 and self._next 5290 and self._next.text.upper() == "FN" 5291 ): 5292 self._advance(2) 5293 fn_syntax = True 5294 5295 func = self._parse_function_call( 5296 functions=functions, 5297 anonymous=anonymous, 5298 optional_parens=optional_parens, 5299 any_token=any_token, 5300 ) 5301 5302 if fn_syntax: 5303 self._match(TokenType.R_BRACE) 5304 5305 return func 5306 5307 def _parse_function_call( 5308 self, 5309 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5310 anonymous: bool = False, 5311 optional_parens: bool = True, 5312 any_token: bool = False, 5313 ) -> t.Optional[exp.Expression]: 5314 if not self._curr: 5315 return None 5316 5317 comments = self._curr.comments 5318 token_type = self._curr.token_type 5319 this = self._curr.text 5320 upper = this.upper() 5321 5322 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5323 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5324 self._advance() 5325 return self._parse_window(parser(self)) 5326 5327 if not self._next or self._next.token_type != TokenType.L_PAREN: 5328 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5329 self._advance() 5330 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5331 5332 return None 5333 5334 if any_token: 5335 if token_type in self.RESERVED_TOKENS: 5336 return None 5337 elif token_type not in self.FUNC_TOKENS: 5338 return None 5339 5340 self._advance(2) 5341 5342 parser = self.FUNCTION_PARSERS.get(upper) 5343 if parser and not anonymous: 5344 this = parser(self) 5345 else: 5346 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5347 5348 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5349 this = self.expression( 5350 subquery_predicate, comments=comments, this=self._parse_select() 5351 ) 5352 self._match_r_paren() 5353 return this 5354 5355 if functions is None: 5356 functions = self.FUNCTIONS 5357 5358 function = functions.get(upper) 5359 known_function = function and not anonymous 5360 5361 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5362 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5363 5364 if alias and known_function: 5365 args = self._kv_to_prop_eq(args) 5366 5367 if known_function: 5368 func_builder = t.cast(t.Callable, function) 5369 5370 if "dialect" in func_builder.__code__.co_varnames: 5371 func = func_builder(args, dialect=self.dialect) 5372 else: 5373 func = func_builder(args) 5374 5375 func = self.validate_expression(func, args) 5376 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5377 func.meta["name"] = this 5378 5379 this = func 5380 else: 5381 if token_type == TokenType.IDENTIFIER: 5382 this = exp.Identifier(this=this, quoted=True) 5383 this = self.expression(exp.Anonymous, this=this, expressions=args) 5384 5385 if isinstance(this, exp.Expression): 5386 this.add_comments(comments) 5387 5388 self._match_r_paren(this) 5389 return self._parse_window(this) 5390 5391 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5392 return expression 5393 5394 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5395 transformed = [] 5396 5397 for index, e in enumerate(expressions): 5398 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5399 if isinstance(e, exp.Alias): 5400 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5401 5402 if not isinstance(e, exp.PropertyEQ): 5403 e = self.expression( 5404 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5405 ) 5406 5407 if isinstance(e.this, exp.Column): 5408 e.this.replace(e.this.this) 5409 else: 5410 e = self._to_prop_eq(e, index) 5411 5412 transformed.append(e) 5413 5414 return transformed 5415 5416 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5417 return self._parse_statement() 5418 5419 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5420 return self._parse_column_def(self._parse_id_var()) 5421 5422 def _parse_user_defined_function( 5423 self, kind: t.Optional[TokenType] = None 5424 ) -> t.Optional[exp.Expression]: 5425 this = self._parse_id_var() 5426 5427 while self._match(TokenType.DOT): 5428 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5429 5430 if not self._match(TokenType.L_PAREN): 5431 return this 5432 5433 expressions = self._parse_csv(self._parse_function_parameter) 5434 self._match_r_paren() 5435 return self.expression( 5436 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5437 ) 5438 5439 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5440 literal = self._parse_primary() 5441 if literal: 5442 return self.expression(exp.Introducer, this=token.text, expression=literal) 5443 5444 return self.expression(exp.Identifier, this=token.text) 5445 5446 def _parse_session_parameter(self) -> exp.SessionParameter: 5447 kind = None 5448 this = self._parse_id_var() or self._parse_primary() 5449 5450 if this and self._match(TokenType.DOT): 5451 kind = this.name 5452 this = self._parse_var() or self._parse_primary() 5453 5454 return self.expression(exp.SessionParameter, this=this, kind=kind) 5455 5456 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5457 return self._parse_id_var() 5458 5459 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5460 index = self._index 5461 5462 if self._match(TokenType.L_PAREN): 5463 expressions = t.cast( 5464 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5465 ) 5466 5467 if not self._match(TokenType.R_PAREN): 5468 self._retreat(index) 5469 else: 5470 expressions = [self._parse_lambda_arg()] 5471 5472 if self._match_set(self.LAMBDAS): 5473 return self.LAMBDAS[self._prev.token_type](self, expressions) 5474 5475 self._retreat(index) 5476 5477 this: t.Optional[exp.Expression] 5478 5479 if self._match(TokenType.DISTINCT): 5480 this = self.expression( 5481 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5482 ) 5483 else: 5484 this = self._parse_select_or_expression(alias=alias) 5485 5486 return self._parse_limit( 5487 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5488 ) 5489 5490 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5491 index = self._index 5492 if not self._match(TokenType.L_PAREN): 5493 return this 5494 5495 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5496 # expr can be of both types 5497 if self._match_set(self.SELECT_START_TOKENS): 5498 self._retreat(index) 5499 return this 5500 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5501 self._match_r_paren() 5502 return self.expression(exp.Schema, this=this, expressions=args) 5503 5504 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5505 return self._parse_column_def(self._parse_field(any_token=True)) 5506 5507 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5508 # column defs are not really columns, they're identifiers 5509 if isinstance(this, exp.Column): 5510 this = this.this 5511 5512 kind = self._parse_types(schema=True) 5513 5514 if self._match_text_seq("FOR", "ORDINALITY"): 5515 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5516 5517 constraints: t.List[exp.Expression] = [] 5518 5519 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5520 ("ALIAS", "MATERIALIZED") 5521 ): 5522 persisted = self._prev.text.upper() == "MATERIALIZED" 5523 constraint_kind = exp.ComputedColumnConstraint( 5524 this=self._parse_assignment(), 5525 persisted=persisted or self._match_text_seq("PERSISTED"), 5526 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5527 ) 5528 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5529 elif ( 5530 kind 5531 and self._match(TokenType.ALIAS, advance=False) 5532 and ( 5533 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5534 or (self._next and self._next.token_type == TokenType.L_PAREN) 5535 ) 5536 ): 5537 self._advance() 5538 constraints.append( 5539 self.expression( 5540 exp.ColumnConstraint, 5541 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5542 ) 5543 ) 5544 5545 while True: 5546 constraint = self._parse_column_constraint() 5547 if not constraint: 5548 break 5549 constraints.append(constraint) 5550 5551 if not kind and not constraints: 5552 return this 5553 5554 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5555 5556 def _parse_auto_increment( 5557 self, 5558 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5559 start = None 5560 increment = None 5561 5562 if self._match(TokenType.L_PAREN, advance=False): 5563 args = self._parse_wrapped_csv(self._parse_bitwise) 5564 start = seq_get(args, 0) 5565 increment = seq_get(args, 1) 5566 elif self._match_text_seq("START"): 5567 start = self._parse_bitwise() 5568 self._match_text_seq("INCREMENT") 5569 increment = self._parse_bitwise() 5570 5571 if start and increment: 5572 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5573 5574 return exp.AutoIncrementColumnConstraint() 5575 5576 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5577 if not self._match_text_seq("REFRESH"): 5578 self._retreat(self._index - 1) 5579 return None 5580 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5581 5582 def _parse_compress(self) -> exp.CompressColumnConstraint: 5583 if self._match(TokenType.L_PAREN, advance=False): 5584 return self.expression( 5585 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5586 ) 5587 5588 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5589 5590 def _parse_generated_as_identity( 5591 self, 5592 ) -> ( 5593 exp.GeneratedAsIdentityColumnConstraint 5594 | exp.ComputedColumnConstraint 5595 | exp.GeneratedAsRowColumnConstraint 5596 ): 5597 if self._match_text_seq("BY", "DEFAULT"): 5598 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5599 this = self.expression( 5600 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5601 ) 5602 else: 5603 self._match_text_seq("ALWAYS") 5604 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5605 5606 self._match(TokenType.ALIAS) 5607 5608 if self._match_text_seq("ROW"): 5609 start = self._match_text_seq("START") 5610 if not start: 5611 self._match(TokenType.END) 5612 hidden = self._match_text_seq("HIDDEN") 5613 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5614 5615 identity = self._match_text_seq("IDENTITY") 5616 5617 if self._match(TokenType.L_PAREN): 5618 if self._match(TokenType.START_WITH): 5619 this.set("start", self._parse_bitwise()) 5620 if self._match_text_seq("INCREMENT", "BY"): 5621 this.set("increment", self._parse_bitwise()) 5622 if self._match_text_seq("MINVALUE"): 5623 this.set("minvalue", self._parse_bitwise()) 5624 if self._match_text_seq("MAXVALUE"): 5625 this.set("maxvalue", self._parse_bitwise()) 5626 5627 if self._match_text_seq("CYCLE"): 5628 this.set("cycle", True) 5629 elif self._match_text_seq("NO", "CYCLE"): 5630 this.set("cycle", False) 5631 5632 if not identity: 5633 this.set("expression", self._parse_range()) 5634 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5635 args = self._parse_csv(self._parse_bitwise) 5636 this.set("start", seq_get(args, 0)) 5637 this.set("increment", seq_get(args, 1)) 5638 5639 self._match_r_paren() 5640 5641 return this 5642 5643 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5644 self._match_text_seq("LENGTH") 5645 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5646 5647 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5648 if self._match_text_seq("NULL"): 5649 return self.expression(exp.NotNullColumnConstraint) 5650 if self._match_text_seq("CASESPECIFIC"): 5651 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5652 if self._match_text_seq("FOR", "REPLICATION"): 5653 return self.expression(exp.NotForReplicationColumnConstraint) 5654 5655 # Unconsume the `NOT` token 5656 self._retreat(self._index - 1) 5657 return None 5658 5659 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5660 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5661 5662 procedure_option_follows = ( 5663 self._match(TokenType.WITH, advance=False) 5664 and self._next 5665 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5666 ) 5667 5668 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5669 return self.expression( 5670 exp.ColumnConstraint, 5671 this=this, 5672 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5673 ) 5674 5675 return this 5676 5677 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5678 if not self._match(TokenType.CONSTRAINT): 5679 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5680 5681 return self.expression( 5682 exp.Constraint, 5683 this=self._parse_id_var(), 5684 expressions=self._parse_unnamed_constraints(), 5685 ) 5686 5687 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5688 constraints = [] 5689 while True: 5690 constraint = self._parse_unnamed_constraint() or self._parse_function() 5691 if not constraint: 5692 break 5693 constraints.append(constraint) 5694 5695 return constraints 5696 5697 def _parse_unnamed_constraint( 5698 self, constraints: t.Optional[t.Collection[str]] = None 5699 ) -> t.Optional[exp.Expression]: 5700 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5701 constraints or self.CONSTRAINT_PARSERS 5702 ): 5703 return None 5704 5705 constraint = self._prev.text.upper() 5706 if constraint not in self.CONSTRAINT_PARSERS: 5707 self.raise_error(f"No parser found for schema constraint {constraint}.") 5708 5709 return self.CONSTRAINT_PARSERS[constraint](self) 5710 5711 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5712 return self._parse_id_var(any_token=False) 5713 5714 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5715 self._match_text_seq("KEY") 5716 return self.expression( 5717 exp.UniqueColumnConstraint, 5718 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5719 this=self._parse_schema(self._parse_unique_key()), 5720 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5721 on_conflict=self._parse_on_conflict(), 5722 ) 5723 5724 def _parse_key_constraint_options(self) -> t.List[str]: 5725 options = [] 5726 while True: 5727 if not self._curr: 5728 break 5729 5730 if self._match(TokenType.ON): 5731 action = None 5732 on = self._advance_any() and self._prev.text 5733 5734 if self._match_text_seq("NO", "ACTION"): 5735 action = "NO ACTION" 5736 elif self._match_text_seq("CASCADE"): 5737 action = "CASCADE" 5738 elif self._match_text_seq("RESTRICT"): 5739 action = "RESTRICT" 5740 elif self._match_pair(TokenType.SET, TokenType.NULL): 5741 action = "SET NULL" 5742 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5743 action = "SET DEFAULT" 5744 else: 5745 self.raise_error("Invalid key constraint") 5746 5747 options.append(f"ON {on} {action}") 5748 else: 5749 var = self._parse_var_from_options( 5750 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5751 ) 5752 if not var: 5753 break 5754 options.append(var.name) 5755 5756 return options 5757 5758 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5759 if match and not self._match(TokenType.REFERENCES): 5760 return None 5761 5762 expressions = None 5763 this = self._parse_table(schema=True) 5764 options = self._parse_key_constraint_options() 5765 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5766 5767 def _parse_foreign_key(self) -> exp.ForeignKey: 5768 expressions = self._parse_wrapped_id_vars() 5769 reference = self._parse_references() 5770 options = {} 5771 5772 while self._match(TokenType.ON): 5773 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5774 self.raise_error("Expected DELETE or UPDATE") 5775 5776 kind = self._prev.text.lower() 5777 5778 if self._match_text_seq("NO", "ACTION"): 5779 action = "NO ACTION" 5780 elif self._match(TokenType.SET): 5781 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5782 action = "SET " + self._prev.text.upper() 5783 else: 5784 self._advance() 5785 action = self._prev.text.upper() 5786 5787 options[kind] = action 5788 5789 return self.expression( 5790 exp.ForeignKey, 5791 expressions=expressions, 5792 reference=reference, 5793 **options, # type: ignore 5794 ) 5795 5796 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5797 return self._parse_field() 5798 5799 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5800 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5801 self._retreat(self._index - 1) 5802 return None 5803 5804 id_vars = self._parse_wrapped_id_vars() 5805 return self.expression( 5806 exp.PeriodForSystemTimeConstraint, 5807 this=seq_get(id_vars, 0), 5808 expression=seq_get(id_vars, 1), 5809 ) 5810 5811 def _parse_primary_key( 5812 self, wrapped_optional: bool = False, in_props: bool = False 5813 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5814 desc = ( 5815 self._match_set((TokenType.ASC, TokenType.DESC)) 5816 and self._prev.token_type == TokenType.DESC 5817 ) 5818 5819 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5820 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5821 5822 expressions = self._parse_wrapped_csv( 5823 self._parse_primary_key_part, optional=wrapped_optional 5824 ) 5825 options = self._parse_key_constraint_options() 5826 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5827 5828 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5829 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5830 5831 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5832 """ 5833 Parses a datetime column in ODBC format. We parse the column into the corresponding 5834 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5835 same as we did for `DATE('yyyy-mm-dd')`. 5836 5837 Reference: 5838 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5839 """ 5840 self._match(TokenType.VAR) 5841 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5842 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5843 if not self._match(TokenType.R_BRACE): 5844 self.raise_error("Expected }") 5845 return expression 5846 5847 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5848 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5849 return this 5850 5851 bracket_kind = self._prev.token_type 5852 if ( 5853 bracket_kind == TokenType.L_BRACE 5854 and self._curr 5855 and self._curr.token_type == TokenType.VAR 5856 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5857 ): 5858 return self._parse_odbc_datetime_literal() 5859 5860 expressions = self._parse_csv( 5861 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5862 ) 5863 5864 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5865 self.raise_error("Expected ]") 5866 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5867 self.raise_error("Expected }") 5868 5869 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5870 if bracket_kind == TokenType.L_BRACE: 5871 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5872 elif not this: 5873 this = build_array_constructor( 5874 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5875 ) 5876 else: 5877 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5878 if constructor_type: 5879 return build_array_constructor( 5880 constructor_type, 5881 args=expressions, 5882 bracket_kind=bracket_kind, 5883 dialect=self.dialect, 5884 ) 5885 5886 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5887 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5888 5889 self._add_comments(this) 5890 return self._parse_bracket(this) 5891 5892 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5893 if self._match(TokenType.COLON): 5894 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5895 return this 5896 5897 def _parse_case(self) -> t.Optional[exp.Expression]: 5898 ifs = [] 5899 default = None 5900 5901 comments = self._prev_comments 5902 expression = self._parse_assignment() 5903 5904 while self._match(TokenType.WHEN): 5905 this = self._parse_assignment() 5906 self._match(TokenType.THEN) 5907 then = self._parse_assignment() 5908 ifs.append(self.expression(exp.If, this=this, true=then)) 5909 5910 if self._match(TokenType.ELSE): 5911 default = self._parse_assignment() 5912 5913 if not self._match(TokenType.END): 5914 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5915 default = exp.column("interval") 5916 else: 5917 self.raise_error("Expected END after CASE", self._prev) 5918 5919 return self.expression( 5920 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5921 ) 5922 5923 def _parse_if(self) -> t.Optional[exp.Expression]: 5924 if self._match(TokenType.L_PAREN): 5925 args = self._parse_csv(self._parse_assignment) 5926 this = self.validate_expression(exp.If.from_arg_list(args), args) 5927 self._match_r_paren() 5928 else: 5929 index = self._index - 1 5930 5931 if self.NO_PAREN_IF_COMMANDS and index == 0: 5932 return self._parse_as_command(self._prev) 5933 5934 condition = self._parse_assignment() 5935 5936 if not condition: 5937 self._retreat(index) 5938 return None 5939 5940 self._match(TokenType.THEN) 5941 true = self._parse_assignment() 5942 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5943 self._match(TokenType.END) 5944 this = self.expression(exp.If, this=condition, true=true, false=false) 5945 5946 return this 5947 5948 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5949 if not self._match_text_seq("VALUE", "FOR"): 5950 self._retreat(self._index - 1) 5951 return None 5952 5953 return self.expression( 5954 exp.NextValueFor, 5955 this=self._parse_column(), 5956 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5957 ) 5958 5959 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5960 this = self._parse_function() or self._parse_var_or_string(upper=True) 5961 5962 if self._match(TokenType.FROM): 5963 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5964 5965 if not self._match(TokenType.COMMA): 5966 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5967 5968 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5969 5970 def _parse_gap_fill(self) -> exp.GapFill: 5971 self._match(TokenType.TABLE) 5972 this = self._parse_table() 5973 5974 self._match(TokenType.COMMA) 5975 args = [this, *self._parse_csv(self._parse_lambda)] 5976 5977 gap_fill = exp.GapFill.from_arg_list(args) 5978 return self.validate_expression(gap_fill, args) 5979 5980 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5981 this = self._parse_assignment() 5982 5983 if not self._match(TokenType.ALIAS): 5984 if self._match(TokenType.COMMA): 5985 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5986 5987 self.raise_error("Expected AS after CAST") 5988 5989 fmt = None 5990 to = self._parse_types() 5991 5992 if self._match(TokenType.FORMAT): 5993 fmt_string = self._parse_string() 5994 fmt = self._parse_at_time_zone(fmt_string) 5995 5996 if not to: 5997 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5998 if to.this in exp.DataType.TEMPORAL_TYPES: 5999 this = self.expression( 6000 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6001 this=this, 6002 format=exp.Literal.string( 6003 format_time( 6004 fmt_string.this if fmt_string else "", 6005 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6006 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6007 ) 6008 ), 6009 safe=safe, 6010 ) 6011 6012 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6013 this.set("zone", fmt.args["zone"]) 6014 return this 6015 elif not to: 6016 self.raise_error("Expected TYPE after CAST") 6017 elif isinstance(to, exp.Identifier): 6018 to = exp.DataType.build(to.name, udt=True) 6019 elif to.this == exp.DataType.Type.CHAR: 6020 if self._match(TokenType.CHARACTER_SET): 6021 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6022 6023 return self.expression( 6024 exp.Cast if strict else exp.TryCast, 6025 this=this, 6026 to=to, 6027 format=fmt, 6028 safe=safe, 6029 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6030 ) 6031 6032 def _parse_string_agg(self) -> exp.GroupConcat: 6033 if self._match(TokenType.DISTINCT): 6034 args: t.List[t.Optional[exp.Expression]] = [ 6035 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6036 ] 6037 if self._match(TokenType.COMMA): 6038 args.extend(self._parse_csv(self._parse_assignment)) 6039 else: 6040 args = self._parse_csv(self._parse_assignment) # type: ignore 6041 6042 if self._match_text_seq("ON", "OVERFLOW"): 6043 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6044 if self._match_text_seq("ERROR"): 6045 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6046 else: 6047 self._match_text_seq("TRUNCATE") 6048 on_overflow = self.expression( 6049 exp.OverflowTruncateBehavior, 6050 this=self._parse_string(), 6051 with_count=( 6052 self._match_text_seq("WITH", "COUNT") 6053 or not self._match_text_seq("WITHOUT", "COUNT") 6054 ), 6055 ) 6056 else: 6057 on_overflow = None 6058 6059 index = self._index 6060 if not self._match(TokenType.R_PAREN) and args: 6061 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6062 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6063 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6064 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6065 6066 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6067 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6068 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6069 if not self._match_text_seq("WITHIN", "GROUP"): 6070 self._retreat(index) 6071 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6072 6073 # The corresponding match_r_paren will be called in parse_function (caller) 6074 self._match_l_paren() 6075 6076 return self.expression( 6077 exp.GroupConcat, 6078 this=self._parse_order(this=seq_get(args, 0)), 6079 separator=seq_get(args, 1), 6080 on_overflow=on_overflow, 6081 ) 6082 6083 def _parse_convert( 6084 self, strict: bool, safe: t.Optional[bool] = None 6085 ) -> t.Optional[exp.Expression]: 6086 this = self._parse_bitwise() 6087 6088 if self._match(TokenType.USING): 6089 to: t.Optional[exp.Expression] = self.expression( 6090 exp.CharacterSet, this=self._parse_var() 6091 ) 6092 elif self._match(TokenType.COMMA): 6093 to = self._parse_types() 6094 else: 6095 to = None 6096 6097 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6098 6099 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6100 """ 6101 There are generally two variants of the DECODE function: 6102 6103 - DECODE(bin, charset) 6104 - DECODE(expression, search, result [, search, result] ... [, default]) 6105 6106 The second variant will always be parsed into a CASE expression. Note that NULL 6107 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6108 instead of relying on pattern matching. 6109 """ 6110 args = self._parse_csv(self._parse_assignment) 6111 6112 if len(args) < 3: 6113 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6114 6115 expression, *expressions = args 6116 if not expression: 6117 return None 6118 6119 ifs = [] 6120 for search, result in zip(expressions[::2], expressions[1::2]): 6121 if not search or not result: 6122 return None 6123 6124 if isinstance(search, exp.Literal): 6125 ifs.append( 6126 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6127 ) 6128 elif isinstance(search, exp.Null): 6129 ifs.append( 6130 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6131 ) 6132 else: 6133 cond = exp.or_( 6134 exp.EQ(this=expression.copy(), expression=search), 6135 exp.and_( 6136 exp.Is(this=expression.copy(), expression=exp.Null()), 6137 exp.Is(this=search.copy(), expression=exp.Null()), 6138 copy=False, 6139 ), 6140 copy=False, 6141 ) 6142 ifs.append(exp.If(this=cond, true=result)) 6143 6144 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6145 6146 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6147 self._match_text_seq("KEY") 6148 key = self._parse_column() 6149 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6150 self._match_text_seq("VALUE") 6151 value = self._parse_bitwise() 6152 6153 if not key and not value: 6154 return None 6155 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6156 6157 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6158 if not this or not self._match_text_seq("FORMAT", "JSON"): 6159 return this 6160 6161 return self.expression(exp.FormatJson, this=this) 6162 6163 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6164 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6165 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6166 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6167 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6168 else: 6169 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6170 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6171 6172 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6173 6174 if not empty and not error and not null: 6175 return None 6176 6177 return self.expression( 6178 exp.OnCondition, 6179 empty=empty, 6180 error=error, 6181 null=null, 6182 ) 6183 6184 def _parse_on_handling( 6185 self, on: str, *values: str 6186 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6187 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6188 for value in values: 6189 if self._match_text_seq(value, "ON", on): 6190 return f"{value} ON {on}" 6191 6192 index = self._index 6193 if self._match(TokenType.DEFAULT): 6194 default_value = self._parse_bitwise() 6195 if self._match_text_seq("ON", on): 6196 return default_value 6197 6198 self._retreat(index) 6199 6200 return None 6201 6202 @t.overload 6203 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6204 6205 @t.overload 6206 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6207 6208 def _parse_json_object(self, agg=False): 6209 star = self._parse_star() 6210 expressions = ( 6211 [star] 6212 if star 6213 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6214 ) 6215 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6216 6217 unique_keys = None 6218 if self._match_text_seq("WITH", "UNIQUE"): 6219 unique_keys = True 6220 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6221 unique_keys = False 6222 6223 self._match_text_seq("KEYS") 6224 6225 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6226 self._parse_type() 6227 ) 6228 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6229 6230 return self.expression( 6231 exp.JSONObjectAgg if agg else exp.JSONObject, 6232 expressions=expressions, 6233 null_handling=null_handling, 6234 unique_keys=unique_keys, 6235 return_type=return_type, 6236 encoding=encoding, 6237 ) 6238 6239 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6240 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6241 if not self._match_text_seq("NESTED"): 6242 this = self._parse_id_var() 6243 kind = self._parse_types(allow_identifiers=False) 6244 nested = None 6245 else: 6246 this = None 6247 kind = None 6248 nested = True 6249 6250 path = self._match_text_seq("PATH") and self._parse_string() 6251 nested_schema = nested and self._parse_json_schema() 6252 6253 return self.expression( 6254 exp.JSONColumnDef, 6255 this=this, 6256 kind=kind, 6257 path=path, 6258 nested_schema=nested_schema, 6259 ) 6260 6261 def _parse_json_schema(self) -> exp.JSONSchema: 6262 self._match_text_seq("COLUMNS") 6263 return self.expression( 6264 exp.JSONSchema, 6265 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6266 ) 6267 6268 def _parse_json_table(self) -> exp.JSONTable: 6269 this = self._parse_format_json(self._parse_bitwise()) 6270 path = self._match(TokenType.COMMA) and self._parse_string() 6271 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6272 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6273 schema = self._parse_json_schema() 6274 6275 return exp.JSONTable( 6276 this=this, 6277 schema=schema, 6278 path=path, 6279 error_handling=error_handling, 6280 empty_handling=empty_handling, 6281 ) 6282 6283 def _parse_match_against(self) -> exp.MatchAgainst: 6284 expressions = self._parse_csv(self._parse_column) 6285 6286 self._match_text_seq(")", "AGAINST", "(") 6287 6288 this = self._parse_string() 6289 6290 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6291 modifier = "IN NATURAL LANGUAGE MODE" 6292 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6293 modifier = f"{modifier} WITH QUERY EXPANSION" 6294 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6295 modifier = "IN BOOLEAN MODE" 6296 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6297 modifier = "WITH QUERY EXPANSION" 6298 else: 6299 modifier = None 6300 6301 return self.expression( 6302 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6303 ) 6304 6305 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6306 def _parse_open_json(self) -> exp.OpenJSON: 6307 this = self._parse_bitwise() 6308 path = self._match(TokenType.COMMA) and self._parse_string() 6309 6310 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6311 this = self._parse_field(any_token=True) 6312 kind = self._parse_types() 6313 path = self._parse_string() 6314 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6315 6316 return self.expression( 6317 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6318 ) 6319 6320 expressions = None 6321 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6322 self._match_l_paren() 6323 expressions = self._parse_csv(_parse_open_json_column_def) 6324 6325 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6326 6327 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6328 args = self._parse_csv(self._parse_bitwise) 6329 6330 if self._match(TokenType.IN): 6331 return self.expression( 6332 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6333 ) 6334 6335 if haystack_first: 6336 haystack = seq_get(args, 0) 6337 needle = seq_get(args, 1) 6338 else: 6339 needle = seq_get(args, 0) 6340 haystack = seq_get(args, 1) 6341 6342 return self.expression( 6343 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6344 ) 6345 6346 def _parse_predict(self) -> exp.Predict: 6347 self._match_text_seq("MODEL") 6348 this = self._parse_table() 6349 6350 self._match(TokenType.COMMA) 6351 self._match_text_seq("TABLE") 6352 6353 return self.expression( 6354 exp.Predict, 6355 this=this, 6356 expression=self._parse_table(), 6357 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6358 ) 6359 6360 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6361 args = self._parse_csv(self._parse_table) 6362 return exp.JoinHint(this=func_name.upper(), expressions=args) 6363 6364 def _parse_substring(self) -> exp.Substring: 6365 # Postgres supports the form: substring(string [from int] [for int]) 6366 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6367 6368 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6369 6370 if self._match(TokenType.FROM): 6371 args.append(self._parse_bitwise()) 6372 if self._match(TokenType.FOR): 6373 if len(args) == 1: 6374 args.append(exp.Literal.number(1)) 6375 args.append(self._parse_bitwise()) 6376 6377 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6378 6379 def _parse_trim(self) -> exp.Trim: 6380 # https://www.w3resource.com/sql/character-functions/trim.php 6381 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6382 6383 position = None 6384 collation = None 6385 expression = None 6386 6387 if self._match_texts(self.TRIM_TYPES): 6388 position = self._prev.text.upper() 6389 6390 this = self._parse_bitwise() 6391 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6392 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6393 expression = self._parse_bitwise() 6394 6395 if invert_order: 6396 this, expression = expression, this 6397 6398 if self._match(TokenType.COLLATE): 6399 collation = self._parse_bitwise() 6400 6401 return self.expression( 6402 exp.Trim, this=this, position=position, expression=expression, collation=collation 6403 ) 6404 6405 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6406 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6407 6408 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6409 return self._parse_window(self._parse_id_var(), alias=True) 6410 6411 def _parse_respect_or_ignore_nulls( 6412 self, this: t.Optional[exp.Expression] 6413 ) -> t.Optional[exp.Expression]: 6414 if self._match_text_seq("IGNORE", "NULLS"): 6415 return self.expression(exp.IgnoreNulls, this=this) 6416 if self._match_text_seq("RESPECT", "NULLS"): 6417 return self.expression(exp.RespectNulls, this=this) 6418 return this 6419 6420 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6421 if self._match(TokenType.HAVING): 6422 self._match_texts(("MAX", "MIN")) 6423 max = self._prev.text.upper() != "MIN" 6424 return self.expression( 6425 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6426 ) 6427 6428 return this 6429 6430 def _parse_window( 6431 self, this: t.Optional[exp.Expression], alias: bool = False 6432 ) -> t.Optional[exp.Expression]: 6433 func = this 6434 comments = func.comments if isinstance(func, exp.Expression) else None 6435 6436 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6437 self._match(TokenType.WHERE) 6438 this = self.expression( 6439 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6440 ) 6441 self._match_r_paren() 6442 6443 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6444 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6445 if self._match_text_seq("WITHIN", "GROUP"): 6446 order = self._parse_wrapped(self._parse_order) 6447 this = self.expression(exp.WithinGroup, this=this, expression=order) 6448 6449 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6450 # Some dialects choose to implement and some do not. 6451 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6452 6453 # There is some code above in _parse_lambda that handles 6454 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6455 6456 # The below changes handle 6457 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6458 6459 # Oracle allows both formats 6460 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6461 # and Snowflake chose to do the same for familiarity 6462 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6463 if isinstance(this, exp.AggFunc): 6464 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6465 6466 if ignore_respect and ignore_respect is not this: 6467 ignore_respect.replace(ignore_respect.this) 6468 this = self.expression(ignore_respect.__class__, this=this) 6469 6470 this = self._parse_respect_or_ignore_nulls(this) 6471 6472 # bigquery select from window x AS (partition by ...) 6473 if alias: 6474 over = None 6475 self._match(TokenType.ALIAS) 6476 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6477 return this 6478 else: 6479 over = self._prev.text.upper() 6480 6481 if comments and isinstance(func, exp.Expression): 6482 func.pop_comments() 6483 6484 if not self._match(TokenType.L_PAREN): 6485 return self.expression( 6486 exp.Window, 6487 comments=comments, 6488 this=this, 6489 alias=self._parse_id_var(False), 6490 over=over, 6491 ) 6492 6493 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6494 6495 first = self._match(TokenType.FIRST) 6496 if self._match_text_seq("LAST"): 6497 first = False 6498 6499 partition, order = self._parse_partition_and_order() 6500 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6501 6502 if kind: 6503 self._match(TokenType.BETWEEN) 6504 start = self._parse_window_spec() 6505 self._match(TokenType.AND) 6506 end = self._parse_window_spec() 6507 6508 spec = self.expression( 6509 exp.WindowSpec, 6510 kind=kind, 6511 start=start["value"], 6512 start_side=start["side"], 6513 end=end["value"], 6514 end_side=end["side"], 6515 ) 6516 else: 6517 spec = None 6518 6519 self._match_r_paren() 6520 6521 window = self.expression( 6522 exp.Window, 6523 comments=comments, 6524 this=this, 6525 partition_by=partition, 6526 order=order, 6527 spec=spec, 6528 alias=window_alias, 6529 over=over, 6530 first=first, 6531 ) 6532 6533 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6534 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6535 return self._parse_window(window, alias=alias) 6536 6537 return window 6538 6539 def _parse_partition_and_order( 6540 self, 6541 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6542 return self._parse_partition_by(), self._parse_order() 6543 6544 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6545 self._match(TokenType.BETWEEN) 6546 6547 return { 6548 "value": ( 6549 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6550 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6551 or self._parse_bitwise() 6552 ), 6553 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6554 } 6555 6556 def _parse_alias( 6557 self, this: t.Optional[exp.Expression], explicit: bool = False 6558 ) -> t.Optional[exp.Expression]: 6559 any_token = self._match(TokenType.ALIAS) 6560 comments = self._prev_comments or [] 6561 6562 if explicit and not any_token: 6563 return this 6564 6565 if self._match(TokenType.L_PAREN): 6566 aliases = self.expression( 6567 exp.Aliases, 6568 comments=comments, 6569 this=this, 6570 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6571 ) 6572 self._match_r_paren(aliases) 6573 return aliases 6574 6575 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6576 self.STRING_ALIASES and self._parse_string_as_identifier() 6577 ) 6578 6579 if alias: 6580 comments.extend(alias.pop_comments()) 6581 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6582 column = this.this 6583 6584 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6585 if not this.comments and column and column.comments: 6586 this.comments = column.pop_comments() 6587 6588 return this 6589 6590 def _parse_id_var( 6591 self, 6592 any_token: bool = True, 6593 tokens: t.Optional[t.Collection[TokenType]] = None, 6594 ) -> t.Optional[exp.Expression]: 6595 expression = self._parse_identifier() 6596 if not expression and ( 6597 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6598 ): 6599 quoted = self._prev.token_type == TokenType.STRING 6600 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6601 6602 return expression 6603 6604 def _parse_string(self) -> t.Optional[exp.Expression]: 6605 if self._match_set(self.STRING_PARSERS): 6606 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6607 return self._parse_placeholder() 6608 6609 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6610 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6611 6612 def _parse_number(self) -> t.Optional[exp.Expression]: 6613 if self._match_set(self.NUMERIC_PARSERS): 6614 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6615 return self._parse_placeholder() 6616 6617 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6618 if self._match(TokenType.IDENTIFIER): 6619 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6620 return self._parse_placeholder() 6621 6622 def _parse_var( 6623 self, 6624 any_token: bool = False, 6625 tokens: t.Optional[t.Collection[TokenType]] = None, 6626 upper: bool = False, 6627 ) -> t.Optional[exp.Expression]: 6628 if ( 6629 (any_token and self._advance_any()) 6630 or self._match(TokenType.VAR) 6631 or (self._match_set(tokens) if tokens else False) 6632 ): 6633 return self.expression( 6634 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6635 ) 6636 return self._parse_placeholder() 6637 6638 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6639 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6640 self._advance() 6641 return self._prev 6642 return None 6643 6644 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6645 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6646 6647 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6648 return self._parse_primary() or self._parse_var(any_token=True) 6649 6650 def _parse_null(self) -> t.Optional[exp.Expression]: 6651 if self._match_set(self.NULL_TOKENS): 6652 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6653 return self._parse_placeholder() 6654 6655 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6656 if self._match(TokenType.TRUE): 6657 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6658 if self._match(TokenType.FALSE): 6659 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6660 return self._parse_placeholder() 6661 6662 def _parse_star(self) -> t.Optional[exp.Expression]: 6663 if self._match(TokenType.STAR): 6664 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6665 return self._parse_placeholder() 6666 6667 def _parse_parameter(self) -> exp.Parameter: 6668 this = self._parse_identifier() or self._parse_primary_or_var() 6669 return self.expression(exp.Parameter, this=this) 6670 6671 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6672 if self._match_set(self.PLACEHOLDER_PARSERS): 6673 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6674 if placeholder: 6675 return placeholder 6676 self._advance(-1) 6677 return None 6678 6679 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6680 if not self._match_texts(keywords): 6681 return None 6682 if self._match(TokenType.L_PAREN, advance=False): 6683 return self._parse_wrapped_csv(self._parse_expression) 6684 6685 expression = self._parse_expression() 6686 return [expression] if expression else None 6687 6688 def _parse_csv( 6689 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6690 ) -> t.List[exp.Expression]: 6691 parse_result = parse_method() 6692 items = [parse_result] if parse_result is not None else [] 6693 6694 while self._match(sep): 6695 self._add_comments(parse_result) 6696 parse_result = parse_method() 6697 if parse_result is not None: 6698 items.append(parse_result) 6699 6700 return items 6701 6702 def _parse_tokens( 6703 self, parse_method: t.Callable, expressions: t.Dict 6704 ) -> t.Optional[exp.Expression]: 6705 this = parse_method() 6706 6707 while self._match_set(expressions): 6708 this = self.expression( 6709 expressions[self._prev.token_type], 6710 this=this, 6711 comments=self._prev_comments, 6712 expression=parse_method(), 6713 ) 6714 6715 return this 6716 6717 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6718 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6719 6720 def _parse_wrapped_csv( 6721 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6722 ) -> t.List[exp.Expression]: 6723 return self._parse_wrapped( 6724 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6725 ) 6726 6727 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6728 wrapped = self._match(TokenType.L_PAREN) 6729 if not wrapped and not optional: 6730 self.raise_error("Expecting (") 6731 parse_result = parse_method() 6732 if wrapped: 6733 self._match_r_paren() 6734 return parse_result 6735 6736 def _parse_expressions(self) -> t.List[exp.Expression]: 6737 return self._parse_csv(self._parse_expression) 6738 6739 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6740 return self._parse_select() or self._parse_set_operations( 6741 self._parse_alias(self._parse_assignment(), explicit=True) 6742 if alias 6743 else self._parse_assignment() 6744 ) 6745 6746 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6747 return self._parse_query_modifiers( 6748 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6749 ) 6750 6751 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6752 this = None 6753 if self._match_texts(self.TRANSACTION_KIND): 6754 this = self._prev.text 6755 6756 self._match_texts(("TRANSACTION", "WORK")) 6757 6758 modes = [] 6759 while True: 6760 mode = [] 6761 while self._match(TokenType.VAR): 6762 mode.append(self._prev.text) 6763 6764 if mode: 6765 modes.append(" ".join(mode)) 6766 if not self._match(TokenType.COMMA): 6767 break 6768 6769 return self.expression(exp.Transaction, this=this, modes=modes) 6770 6771 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6772 chain = None 6773 savepoint = None 6774 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6775 6776 self._match_texts(("TRANSACTION", "WORK")) 6777 6778 if self._match_text_seq("TO"): 6779 self._match_text_seq("SAVEPOINT") 6780 savepoint = self._parse_id_var() 6781 6782 if self._match(TokenType.AND): 6783 chain = not self._match_text_seq("NO") 6784 self._match_text_seq("CHAIN") 6785 6786 if is_rollback: 6787 return self.expression(exp.Rollback, savepoint=savepoint) 6788 6789 return self.expression(exp.Commit, chain=chain) 6790 6791 def _parse_refresh(self) -> exp.Refresh: 6792 self._match(TokenType.TABLE) 6793 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6794 6795 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6796 if not self._match_text_seq("ADD"): 6797 return None 6798 6799 self._match(TokenType.COLUMN) 6800 exists_column = self._parse_exists(not_=True) 6801 expression = self._parse_field_def() 6802 6803 if expression: 6804 expression.set("exists", exists_column) 6805 6806 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6807 if self._match_texts(("FIRST", "AFTER")): 6808 position = self._prev.text 6809 column_position = self.expression( 6810 exp.ColumnPosition, this=self._parse_column(), position=position 6811 ) 6812 expression.set("position", column_position) 6813 6814 return expression 6815 6816 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6817 drop = self._match(TokenType.DROP) and self._parse_drop() 6818 if drop and not isinstance(drop, exp.Command): 6819 drop.set("kind", drop.args.get("kind", "COLUMN")) 6820 return drop 6821 6822 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6823 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6824 return self.expression( 6825 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6826 ) 6827 6828 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6829 index = self._index - 1 6830 6831 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6832 return self._parse_csv( 6833 lambda: self.expression( 6834 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6835 ) 6836 ) 6837 6838 self._retreat(index) 6839 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6840 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6841 6842 if self._match_text_seq("ADD", "COLUMNS"): 6843 schema = self._parse_schema() 6844 if schema: 6845 return [schema] 6846 return [] 6847 6848 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6849 6850 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6851 if self._match_texts(self.ALTER_ALTER_PARSERS): 6852 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6853 6854 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6855 # keyword after ALTER we default to parsing this statement 6856 self._match(TokenType.COLUMN) 6857 column = self._parse_field(any_token=True) 6858 6859 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6860 return self.expression(exp.AlterColumn, this=column, drop=True) 6861 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6862 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6863 if self._match(TokenType.COMMENT): 6864 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6865 if self._match_text_seq("DROP", "NOT", "NULL"): 6866 return self.expression( 6867 exp.AlterColumn, 6868 this=column, 6869 drop=True, 6870 allow_null=True, 6871 ) 6872 if self._match_text_seq("SET", "NOT", "NULL"): 6873 return self.expression( 6874 exp.AlterColumn, 6875 this=column, 6876 allow_null=False, 6877 ) 6878 self._match_text_seq("SET", "DATA") 6879 self._match_text_seq("TYPE") 6880 return self.expression( 6881 exp.AlterColumn, 6882 this=column, 6883 dtype=self._parse_types(), 6884 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6885 using=self._match(TokenType.USING) and self._parse_assignment(), 6886 ) 6887 6888 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6889 if self._match_texts(("ALL", "EVEN", "AUTO")): 6890 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6891 6892 self._match_text_seq("KEY", "DISTKEY") 6893 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6894 6895 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6896 if compound: 6897 self._match_text_seq("SORTKEY") 6898 6899 if self._match(TokenType.L_PAREN, advance=False): 6900 return self.expression( 6901 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6902 ) 6903 6904 self._match_texts(("AUTO", "NONE")) 6905 return self.expression( 6906 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6907 ) 6908 6909 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6910 index = self._index - 1 6911 6912 partition_exists = self._parse_exists() 6913 if self._match(TokenType.PARTITION, advance=False): 6914 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6915 6916 self._retreat(index) 6917 return self._parse_csv(self._parse_drop_column) 6918 6919 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6920 if self._match(TokenType.COLUMN): 6921 exists = self._parse_exists() 6922 old_column = self._parse_column() 6923 to = self._match_text_seq("TO") 6924 new_column = self._parse_column() 6925 6926 if old_column is None or to is None or new_column is None: 6927 return None 6928 6929 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6930 6931 self._match_text_seq("TO") 6932 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6933 6934 def _parse_alter_table_set(self) -> exp.AlterSet: 6935 alter_set = self.expression(exp.AlterSet) 6936 6937 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6938 "TABLE", "PROPERTIES" 6939 ): 6940 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6941 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6942 alter_set.set("expressions", [self._parse_assignment()]) 6943 elif self._match_texts(("LOGGED", "UNLOGGED")): 6944 alter_set.set("option", exp.var(self._prev.text.upper())) 6945 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6946 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6947 elif self._match_text_seq("LOCATION"): 6948 alter_set.set("location", self._parse_field()) 6949 elif self._match_text_seq("ACCESS", "METHOD"): 6950 alter_set.set("access_method", self._parse_field()) 6951 elif self._match_text_seq("TABLESPACE"): 6952 alter_set.set("tablespace", self._parse_field()) 6953 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6954 alter_set.set("file_format", [self._parse_field()]) 6955 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6956 alter_set.set("file_format", self._parse_wrapped_options()) 6957 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6958 alter_set.set("copy_options", self._parse_wrapped_options()) 6959 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6960 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6961 else: 6962 if self._match_text_seq("SERDE"): 6963 alter_set.set("serde", self._parse_field()) 6964 6965 alter_set.set("expressions", [self._parse_properties()]) 6966 6967 return alter_set 6968 6969 def _parse_alter(self) -> exp.Alter | exp.Command: 6970 start = self._prev 6971 6972 alter_token = self._match_set(self.ALTERABLES) and self._prev 6973 if not alter_token: 6974 return self._parse_as_command(start) 6975 6976 exists = self._parse_exists() 6977 only = self._match_text_seq("ONLY") 6978 this = self._parse_table(schema=True) 6979 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6980 6981 if self._next: 6982 self._advance() 6983 6984 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6985 if parser: 6986 actions = ensure_list(parser(self)) 6987 not_valid = self._match_text_seq("NOT", "VALID") 6988 options = self._parse_csv(self._parse_property) 6989 6990 if not self._curr and actions: 6991 return self.expression( 6992 exp.Alter, 6993 this=this, 6994 kind=alter_token.text.upper(), 6995 exists=exists, 6996 actions=actions, 6997 only=only, 6998 options=options, 6999 cluster=cluster, 7000 not_valid=not_valid, 7001 ) 7002 7003 return self._parse_as_command(start) 7004 7005 def _parse_merge(self) -> exp.Merge: 7006 self._match(TokenType.INTO) 7007 target = self._parse_table() 7008 7009 if target and self._match(TokenType.ALIAS, advance=False): 7010 target.set("alias", self._parse_table_alias()) 7011 7012 self._match(TokenType.USING) 7013 using = self._parse_table() 7014 7015 self._match(TokenType.ON) 7016 on = self._parse_assignment() 7017 7018 return self.expression( 7019 exp.Merge, 7020 this=target, 7021 using=using, 7022 on=on, 7023 whens=self._parse_when_matched(), 7024 returning=self._parse_returning(), 7025 ) 7026 7027 def _parse_when_matched(self) -> exp.Whens: 7028 whens = [] 7029 7030 while self._match(TokenType.WHEN): 7031 matched = not self._match(TokenType.NOT) 7032 self._match_text_seq("MATCHED") 7033 source = ( 7034 False 7035 if self._match_text_seq("BY", "TARGET") 7036 else self._match_text_seq("BY", "SOURCE") 7037 ) 7038 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7039 7040 self._match(TokenType.THEN) 7041 7042 if self._match(TokenType.INSERT): 7043 this = self._parse_star() 7044 if this: 7045 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7046 else: 7047 then = self.expression( 7048 exp.Insert, 7049 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7050 expression=self._match_text_seq("VALUES") and self._parse_value(), 7051 ) 7052 elif self._match(TokenType.UPDATE): 7053 expressions = self._parse_star() 7054 if expressions: 7055 then = self.expression(exp.Update, expressions=expressions) 7056 else: 7057 then = self.expression( 7058 exp.Update, 7059 expressions=self._match(TokenType.SET) 7060 and self._parse_csv(self._parse_equality), 7061 ) 7062 elif self._match(TokenType.DELETE): 7063 then = self.expression(exp.Var, this=self._prev.text) 7064 else: 7065 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7066 7067 whens.append( 7068 self.expression( 7069 exp.When, 7070 matched=matched, 7071 source=source, 7072 condition=condition, 7073 then=then, 7074 ) 7075 ) 7076 return self.expression(exp.Whens, expressions=whens) 7077 7078 def _parse_show(self) -> t.Optional[exp.Expression]: 7079 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7080 if parser: 7081 return parser(self) 7082 return self._parse_as_command(self._prev) 7083 7084 def _parse_set_item_assignment( 7085 self, kind: t.Optional[str] = None 7086 ) -> t.Optional[exp.Expression]: 7087 index = self._index 7088 7089 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7090 return self._parse_set_transaction(global_=kind == "GLOBAL") 7091 7092 left = self._parse_primary() or self._parse_column() 7093 assignment_delimiter = self._match_texts(("=", "TO")) 7094 7095 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7096 self._retreat(index) 7097 return None 7098 7099 right = self._parse_statement() or self._parse_id_var() 7100 if isinstance(right, (exp.Column, exp.Identifier)): 7101 right = exp.var(right.name) 7102 7103 this = self.expression(exp.EQ, this=left, expression=right) 7104 return self.expression(exp.SetItem, this=this, kind=kind) 7105 7106 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7107 self._match_text_seq("TRANSACTION") 7108 characteristics = self._parse_csv( 7109 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7110 ) 7111 return self.expression( 7112 exp.SetItem, 7113 expressions=characteristics, 7114 kind="TRANSACTION", 7115 **{"global": global_}, # type: ignore 7116 ) 7117 7118 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7119 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7120 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7121 7122 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7123 index = self._index 7124 set_ = self.expression( 7125 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7126 ) 7127 7128 if self._curr: 7129 self._retreat(index) 7130 return self._parse_as_command(self._prev) 7131 7132 return set_ 7133 7134 def _parse_var_from_options( 7135 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7136 ) -> t.Optional[exp.Var]: 7137 start = self._curr 7138 if not start: 7139 return None 7140 7141 option = start.text.upper() 7142 continuations = options.get(option) 7143 7144 index = self._index 7145 self._advance() 7146 for keywords in continuations or []: 7147 if isinstance(keywords, str): 7148 keywords = (keywords,) 7149 7150 if self._match_text_seq(*keywords): 7151 option = f"{option} {' '.join(keywords)}" 7152 break 7153 else: 7154 if continuations or continuations is None: 7155 if raise_unmatched: 7156 self.raise_error(f"Unknown option {option}") 7157 7158 self._retreat(index) 7159 return None 7160 7161 return exp.var(option) 7162 7163 def _parse_as_command(self, start: Token) -> exp.Command: 7164 while self._curr: 7165 self._advance() 7166 text = self._find_sql(start, self._prev) 7167 size = len(start.text) 7168 self._warn_unsupported() 7169 return exp.Command(this=text[:size], expression=text[size:]) 7170 7171 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7172 settings = [] 7173 7174 self._match_l_paren() 7175 kind = self._parse_id_var() 7176 7177 if self._match(TokenType.L_PAREN): 7178 while True: 7179 key = self._parse_id_var() 7180 value = self._parse_primary() 7181 if not key and value is None: 7182 break 7183 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7184 self._match(TokenType.R_PAREN) 7185 7186 self._match_r_paren() 7187 7188 return self.expression( 7189 exp.DictProperty, 7190 this=this, 7191 kind=kind.this if kind else None, 7192 settings=settings, 7193 ) 7194 7195 def _parse_dict_range(self, this: str) -> exp.DictRange: 7196 self._match_l_paren() 7197 has_min = self._match_text_seq("MIN") 7198 if has_min: 7199 min = self._parse_var() or self._parse_primary() 7200 self._match_text_seq("MAX") 7201 max = self._parse_var() or self._parse_primary() 7202 else: 7203 max = self._parse_var() or self._parse_primary() 7204 min = exp.Literal.number(0) 7205 self._match_r_paren() 7206 return self.expression(exp.DictRange, this=this, min=min, max=max) 7207 7208 def _parse_comprehension( 7209 self, this: t.Optional[exp.Expression] 7210 ) -> t.Optional[exp.Comprehension]: 7211 index = self._index 7212 expression = self._parse_column() 7213 if not self._match(TokenType.IN): 7214 self._retreat(index - 1) 7215 return None 7216 iterator = self._parse_column() 7217 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7218 return self.expression( 7219 exp.Comprehension, 7220 this=this, 7221 expression=expression, 7222 iterator=iterator, 7223 condition=condition, 7224 ) 7225 7226 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7227 if self._match(TokenType.HEREDOC_STRING): 7228 return self.expression(exp.Heredoc, this=self._prev.text) 7229 7230 if not self._match_text_seq("$"): 7231 return None 7232 7233 tags = ["$"] 7234 tag_text = None 7235 7236 if self._is_connected(): 7237 self._advance() 7238 tags.append(self._prev.text.upper()) 7239 else: 7240 self.raise_error("No closing $ found") 7241 7242 if tags[-1] != "$": 7243 if self._is_connected() and self._match_text_seq("$"): 7244 tag_text = tags[-1] 7245 tags.append("$") 7246 else: 7247 self.raise_error("No closing $ found") 7248 7249 heredoc_start = self._curr 7250 7251 while self._curr: 7252 if self._match_text_seq(*tags, advance=False): 7253 this = self._find_sql(heredoc_start, self._prev) 7254 self._advance(len(tags)) 7255 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7256 7257 self._advance() 7258 7259 self.raise_error(f"No closing {''.join(tags)} found") 7260 return None 7261 7262 def _find_parser( 7263 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7264 ) -> t.Optional[t.Callable]: 7265 if not self._curr: 7266 return None 7267 7268 index = self._index 7269 this = [] 7270 while True: 7271 # The current token might be multiple words 7272 curr = self._curr.text.upper() 7273 key = curr.split(" ") 7274 this.append(curr) 7275 7276 self._advance() 7277 result, trie = in_trie(trie, key) 7278 if result == TrieResult.FAILED: 7279 break 7280 7281 if result == TrieResult.EXISTS: 7282 subparser = parsers[" ".join(this)] 7283 return subparser 7284 7285 self._retreat(index) 7286 return None 7287 7288 def _match(self, token_type, advance=True, expression=None): 7289 if not self._curr: 7290 return None 7291 7292 if self._curr.token_type == token_type: 7293 if advance: 7294 self._advance() 7295 self._add_comments(expression) 7296 return True 7297 7298 return None 7299 7300 def _match_set(self, types, advance=True): 7301 if not self._curr: 7302 return None 7303 7304 if self._curr.token_type in types: 7305 if advance: 7306 self._advance() 7307 return True 7308 7309 return None 7310 7311 def _match_pair(self, token_type_a, token_type_b, advance=True): 7312 if not self._curr or not self._next: 7313 return None 7314 7315 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7316 if advance: 7317 self._advance(2) 7318 return True 7319 7320 return None 7321 7322 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7323 if not self._match(TokenType.L_PAREN, expression=expression): 7324 self.raise_error("Expecting (") 7325 7326 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7327 if not self._match(TokenType.R_PAREN, expression=expression): 7328 self.raise_error("Expecting )") 7329 7330 def _match_texts(self, texts, advance=True): 7331 if ( 7332 self._curr 7333 and self._curr.token_type != TokenType.STRING 7334 and self._curr.text.upper() in texts 7335 ): 7336 if advance: 7337 self._advance() 7338 return True 7339 return None 7340 7341 def _match_text_seq(self, *texts, advance=True): 7342 index = self._index 7343 for text in texts: 7344 if ( 7345 self._curr 7346 and self._curr.token_type != TokenType.STRING 7347 and self._curr.text.upper() == text 7348 ): 7349 self._advance() 7350 else: 7351 self._retreat(index) 7352 return None 7353 7354 if not advance: 7355 self._retreat(index) 7356 7357 return True 7358 7359 def _replace_lambda( 7360 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7361 ) -> t.Optional[exp.Expression]: 7362 if not node: 7363 return node 7364 7365 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7366 7367 for column in node.find_all(exp.Column): 7368 typ = lambda_types.get(column.parts[0].name) 7369 if typ is not None: 7370 dot_or_id = column.to_dot() if column.table else column.this 7371 7372 if typ: 7373 dot_or_id = self.expression( 7374 exp.Cast, 7375 this=dot_or_id, 7376 to=typ, 7377 ) 7378 7379 parent = column.parent 7380 7381 while isinstance(parent, exp.Dot): 7382 if not isinstance(parent.parent, exp.Dot): 7383 parent.replace(dot_or_id) 7384 break 7385 parent = parent.parent 7386 else: 7387 if column is node: 7388 node = dot_or_id 7389 else: 7390 column.replace(dot_or_id) 7391 return node 7392 7393 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7394 start = self._prev 7395 7396 # Not to be confused with TRUNCATE(number, decimals) function call 7397 if self._match(TokenType.L_PAREN): 7398 self._retreat(self._index - 2) 7399 return self._parse_function() 7400 7401 # Clickhouse supports TRUNCATE DATABASE as well 7402 is_database = self._match(TokenType.DATABASE) 7403 7404 self._match(TokenType.TABLE) 7405 7406 exists = self._parse_exists(not_=False) 7407 7408 expressions = self._parse_csv( 7409 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7410 ) 7411 7412 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7413 7414 if self._match_text_seq("RESTART", "IDENTITY"): 7415 identity = "RESTART" 7416 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7417 identity = "CONTINUE" 7418 else: 7419 identity = None 7420 7421 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7422 option = self._prev.text 7423 else: 7424 option = None 7425 7426 partition = self._parse_partition() 7427 7428 # Fallback case 7429 if self._curr: 7430 return self._parse_as_command(start) 7431 7432 return self.expression( 7433 exp.TruncateTable, 7434 expressions=expressions, 7435 is_database=is_database, 7436 exists=exists, 7437 cluster=cluster, 7438 identity=identity, 7439 option=option, 7440 partition=partition, 7441 ) 7442 7443 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7444 this = self._parse_ordered(self._parse_opclass) 7445 7446 if not self._match(TokenType.WITH): 7447 return this 7448 7449 op = self._parse_var(any_token=True) 7450 7451 return self.expression(exp.WithOperator, this=this, op=op) 7452 7453 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7454 self._match(TokenType.EQ) 7455 self._match(TokenType.L_PAREN) 7456 7457 opts: t.List[t.Optional[exp.Expression]] = [] 7458 while self._curr and not self._match(TokenType.R_PAREN): 7459 if self._match_text_seq("FORMAT_NAME", "="): 7460 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7461 # so we parse it separately to use _parse_field() 7462 prop = self.expression( 7463 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7464 ) 7465 opts.append(prop) 7466 else: 7467 opts.append(self._parse_property()) 7468 7469 self._match(TokenType.COMMA) 7470 7471 return opts 7472 7473 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7474 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7475 7476 options = [] 7477 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7478 option = self._parse_var(any_token=True) 7479 prev = self._prev.text.upper() 7480 7481 # Different dialects might separate options and values by white space, "=" and "AS" 7482 self._match(TokenType.EQ) 7483 self._match(TokenType.ALIAS) 7484 7485 param = self.expression(exp.CopyParameter, this=option) 7486 7487 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7488 TokenType.L_PAREN, advance=False 7489 ): 7490 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7491 param.set("expressions", self._parse_wrapped_options()) 7492 elif prev == "FILE_FORMAT": 7493 # T-SQL's external file format case 7494 param.set("expression", self._parse_field()) 7495 else: 7496 param.set("expression", self._parse_unquoted_field()) 7497 7498 options.append(param) 7499 self._match(sep) 7500 7501 return options 7502 7503 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7504 expr = self.expression(exp.Credentials) 7505 7506 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7507 expr.set("storage", self._parse_field()) 7508 if self._match_text_seq("CREDENTIALS"): 7509 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7510 creds = ( 7511 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7512 ) 7513 expr.set("credentials", creds) 7514 if self._match_text_seq("ENCRYPTION"): 7515 expr.set("encryption", self._parse_wrapped_options()) 7516 if self._match_text_seq("IAM_ROLE"): 7517 expr.set("iam_role", self._parse_field()) 7518 if self._match_text_seq("REGION"): 7519 expr.set("region", self._parse_field()) 7520 7521 return expr 7522 7523 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7524 return self._parse_field() 7525 7526 def _parse_copy(self) -> exp.Copy | exp.Command: 7527 start = self._prev 7528 7529 self._match(TokenType.INTO) 7530 7531 this = ( 7532 self._parse_select(nested=True, parse_subquery_alias=False) 7533 if self._match(TokenType.L_PAREN, advance=False) 7534 else self._parse_table(schema=True) 7535 ) 7536 7537 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7538 7539 files = self._parse_csv(self._parse_file_location) 7540 credentials = self._parse_credentials() 7541 7542 self._match_text_seq("WITH") 7543 7544 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7545 7546 # Fallback case 7547 if self._curr: 7548 return self._parse_as_command(start) 7549 7550 return self.expression( 7551 exp.Copy, 7552 this=this, 7553 kind=kind, 7554 credentials=credentials, 7555 files=files, 7556 params=params, 7557 ) 7558 7559 def _parse_normalize(self) -> exp.Normalize: 7560 return self.expression( 7561 exp.Normalize, 7562 this=self._parse_bitwise(), 7563 form=self._match(TokenType.COMMA) and self._parse_var(), 7564 ) 7565 7566 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7567 if self._match_text_seq("COLUMNS", "(", advance=False): 7568 this = self._parse_function() 7569 if isinstance(this, exp.Columns): 7570 this.set("unpack", True) 7571 return this 7572 7573 return self.expression( 7574 exp.Star, 7575 **{ # type: ignore 7576 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7577 "replace": self._parse_star_op("REPLACE"), 7578 "rename": self._parse_star_op("RENAME"), 7579 }, 7580 ) 7581 7582 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7583 privilege_parts = [] 7584 7585 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7586 # (end of privilege list) or L_PAREN (start of column list) are met 7587 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7588 privilege_parts.append(self._curr.text.upper()) 7589 self._advance() 7590 7591 this = exp.var(" ".join(privilege_parts)) 7592 expressions = ( 7593 self._parse_wrapped_csv(self._parse_column) 7594 if self._match(TokenType.L_PAREN, advance=False) 7595 else None 7596 ) 7597 7598 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7599 7600 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7601 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7602 principal = self._parse_id_var() 7603 7604 if not principal: 7605 return None 7606 7607 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7608 7609 def _parse_grant(self) -> exp.Grant | exp.Command: 7610 start = self._prev 7611 7612 privileges = self._parse_csv(self._parse_grant_privilege) 7613 7614 self._match(TokenType.ON) 7615 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7616 7617 # Attempt to parse the securable e.g. MySQL allows names 7618 # such as "foo.*", "*.*" which are not easily parseable yet 7619 securable = self._try_parse(self._parse_table_parts) 7620 7621 if not securable or not self._match_text_seq("TO"): 7622 return self._parse_as_command(start) 7623 7624 principals = self._parse_csv(self._parse_grant_principal) 7625 7626 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7627 7628 if self._curr: 7629 return self._parse_as_command(start) 7630 7631 return self.expression( 7632 exp.Grant, 7633 privileges=privileges, 7634 kind=kind, 7635 securable=securable, 7636 principals=principals, 7637 grant_option=grant_option, 7638 ) 7639 7640 def _parse_overlay(self) -> exp.Overlay: 7641 return self.expression( 7642 exp.Overlay, 7643 **{ # type: ignore 7644 "this": self._parse_bitwise(), 7645 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7646 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7647 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7648 }, 7649 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME2, 337 TokenType.DATETIME64, 338 TokenType.SMALLDATETIME, 339 TokenType.DATE, 340 TokenType.DATE32, 341 TokenType.INT4RANGE, 342 TokenType.INT4MULTIRANGE, 343 TokenType.INT8RANGE, 344 TokenType.INT8MULTIRANGE, 345 TokenType.NUMRANGE, 346 TokenType.NUMMULTIRANGE, 347 TokenType.TSRANGE, 348 TokenType.TSMULTIRANGE, 349 TokenType.TSTZRANGE, 350 TokenType.TSTZMULTIRANGE, 351 TokenType.DATERANGE, 352 TokenType.DATEMULTIRANGE, 353 TokenType.DECIMAL, 354 TokenType.DECIMAL32, 355 TokenType.DECIMAL64, 356 TokenType.DECIMAL128, 357 TokenType.DECIMAL256, 358 TokenType.UDECIMAL, 359 TokenType.BIGDECIMAL, 360 TokenType.UUID, 361 TokenType.GEOGRAPHY, 362 TokenType.GEOMETRY, 363 TokenType.POINT, 364 TokenType.RING, 365 TokenType.LINESTRING, 366 TokenType.MULTILINESTRING, 367 TokenType.POLYGON, 368 TokenType.MULTIPOLYGON, 369 TokenType.HLLSKETCH, 370 TokenType.HSTORE, 371 TokenType.PSEUDO_TYPE, 372 TokenType.SUPER, 373 TokenType.SERIAL, 374 TokenType.SMALLSERIAL, 375 TokenType.BIGSERIAL, 376 TokenType.XML, 377 TokenType.YEAR, 378 TokenType.UNIQUEIDENTIFIER, 379 TokenType.USERDEFINED, 380 TokenType.MONEY, 381 TokenType.SMALLMONEY, 382 TokenType.ROWVERSION, 383 TokenType.IMAGE, 384 TokenType.VARIANT, 385 TokenType.VECTOR, 386 TokenType.OBJECT, 387 TokenType.OBJECT_IDENTIFIER, 388 TokenType.INET, 389 TokenType.IPADDRESS, 390 TokenType.IPPREFIX, 391 TokenType.IPV4, 392 TokenType.IPV6, 393 TokenType.UNKNOWN, 394 TokenType.NULL, 395 TokenType.NAME, 396 TokenType.TDIGEST, 397 *ENUM_TYPE_TOKENS, 398 *NESTED_TYPE_TOKENS, 399 *AGGREGATE_TYPE_TOKENS, 400 } 401 402 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 403 TokenType.BIGINT: TokenType.UBIGINT, 404 TokenType.INT: TokenType.UINT, 405 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 406 TokenType.SMALLINT: TokenType.USMALLINT, 407 TokenType.TINYINT: TokenType.UTINYINT, 408 TokenType.DECIMAL: TokenType.UDECIMAL, 409 } 410 411 SUBQUERY_PREDICATES = { 412 TokenType.ANY: exp.Any, 413 TokenType.ALL: exp.All, 414 TokenType.EXISTS: exp.Exists, 415 TokenType.SOME: exp.Any, 416 } 417 418 RESERVED_TOKENS = { 419 *Tokenizer.SINGLE_TOKENS.values(), 420 TokenType.SELECT, 421 } - {TokenType.IDENTIFIER} 422 423 DB_CREATABLES = { 424 TokenType.DATABASE, 425 TokenType.DICTIONARY, 426 TokenType.MODEL, 427 TokenType.SCHEMA, 428 TokenType.SEQUENCE, 429 TokenType.STORAGE_INTEGRATION, 430 TokenType.TABLE, 431 TokenType.TAG, 432 TokenType.VIEW, 433 TokenType.WAREHOUSE, 434 TokenType.STREAMLIT, 435 TokenType.SINK, 436 TokenType.SOURCE, 437 } 438 439 CREATABLES = { 440 TokenType.COLUMN, 441 TokenType.CONSTRAINT, 442 TokenType.FOREIGN_KEY, 443 TokenType.FUNCTION, 444 TokenType.INDEX, 445 TokenType.PROCEDURE, 446 *DB_CREATABLES, 447 } 448 449 ALTERABLES = { 450 TokenType.INDEX, 451 TokenType.TABLE, 452 TokenType.VIEW, 453 } 454 455 # Tokens that can represent identifiers 456 ID_VAR_TOKENS = { 457 TokenType.ALL, 458 TokenType.ATTACH, 459 TokenType.VAR, 460 TokenType.ANTI, 461 TokenType.APPLY, 462 TokenType.ASC, 463 TokenType.ASOF, 464 TokenType.AUTO_INCREMENT, 465 TokenType.BEGIN, 466 TokenType.BPCHAR, 467 TokenType.CACHE, 468 TokenType.CASE, 469 TokenType.COLLATE, 470 TokenType.COMMAND, 471 TokenType.COMMENT, 472 TokenType.COMMIT, 473 TokenType.CONSTRAINT, 474 TokenType.COPY, 475 TokenType.CUBE, 476 TokenType.DEFAULT, 477 TokenType.DELETE, 478 TokenType.DESC, 479 TokenType.DESCRIBE, 480 TokenType.DETACH, 481 TokenType.DICTIONARY, 482 TokenType.DIV, 483 TokenType.END, 484 TokenType.EXECUTE, 485 TokenType.ESCAPE, 486 TokenType.FALSE, 487 TokenType.FIRST, 488 TokenType.FILTER, 489 TokenType.FINAL, 490 TokenType.FORMAT, 491 TokenType.FULL, 492 TokenType.IDENTIFIER, 493 TokenType.IS, 494 TokenType.ISNULL, 495 TokenType.INTERVAL, 496 TokenType.KEEP, 497 TokenType.KILL, 498 TokenType.LEFT, 499 TokenType.LOAD, 500 TokenType.MERGE, 501 TokenType.NATURAL, 502 TokenType.NEXT, 503 TokenType.OFFSET, 504 TokenType.OPERATOR, 505 TokenType.ORDINALITY, 506 TokenType.OVERLAPS, 507 TokenType.OVERWRITE, 508 TokenType.PARTITION, 509 TokenType.PERCENT, 510 TokenType.PIVOT, 511 TokenType.PRAGMA, 512 TokenType.RANGE, 513 TokenType.RECURSIVE, 514 TokenType.REFERENCES, 515 TokenType.REFRESH, 516 TokenType.RENAME, 517 TokenType.REPLACE, 518 TokenType.RIGHT, 519 TokenType.ROLLUP, 520 TokenType.ROW, 521 TokenType.ROWS, 522 TokenType.SEMI, 523 TokenType.SET, 524 TokenType.SETTINGS, 525 TokenType.SHOW, 526 TokenType.TEMPORARY, 527 TokenType.TOP, 528 TokenType.TRUE, 529 TokenType.TRUNCATE, 530 TokenType.UNIQUE, 531 TokenType.UNNEST, 532 TokenType.UNPIVOT, 533 TokenType.UPDATE, 534 TokenType.USE, 535 TokenType.VOLATILE, 536 TokenType.WINDOW, 537 *CREATABLES, 538 *SUBQUERY_PREDICATES, 539 *TYPE_TOKENS, 540 *NO_PAREN_FUNCTIONS, 541 } 542 ID_VAR_TOKENS.remove(TokenType.UNION) 543 544 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 545 546 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 547 TokenType.ANTI, 548 TokenType.APPLY, 549 TokenType.ASOF, 550 TokenType.FULL, 551 TokenType.LEFT, 552 TokenType.LOCK, 553 TokenType.NATURAL, 554 TokenType.OFFSET, 555 TokenType.RIGHT, 556 TokenType.SEMI, 557 TokenType.WINDOW, 558 } 559 560 ALIAS_TOKENS = ID_VAR_TOKENS 561 562 ARRAY_CONSTRUCTORS = { 563 "ARRAY": exp.Array, 564 "LIST": exp.List, 565 } 566 567 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 568 569 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 570 571 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 572 573 FUNC_TOKENS = { 574 TokenType.COLLATE, 575 TokenType.COMMAND, 576 TokenType.CURRENT_DATE, 577 TokenType.CURRENT_DATETIME, 578 TokenType.CURRENT_TIMESTAMP, 579 TokenType.CURRENT_TIME, 580 TokenType.CURRENT_USER, 581 TokenType.FILTER, 582 TokenType.FIRST, 583 TokenType.FORMAT, 584 TokenType.GLOB, 585 TokenType.IDENTIFIER, 586 TokenType.INDEX, 587 TokenType.ISNULL, 588 TokenType.ILIKE, 589 TokenType.INSERT, 590 TokenType.LIKE, 591 TokenType.MERGE, 592 TokenType.NEXT, 593 TokenType.OFFSET, 594 TokenType.PRIMARY_KEY, 595 TokenType.RANGE, 596 TokenType.REPLACE, 597 TokenType.RLIKE, 598 TokenType.ROW, 599 TokenType.UNNEST, 600 TokenType.VAR, 601 TokenType.LEFT, 602 TokenType.RIGHT, 603 TokenType.SEQUENCE, 604 TokenType.DATE, 605 TokenType.DATETIME, 606 TokenType.TABLE, 607 TokenType.TIMESTAMP, 608 TokenType.TIMESTAMPTZ, 609 TokenType.TRUNCATE, 610 TokenType.WINDOW, 611 TokenType.XOR, 612 *TYPE_TOKENS, 613 *SUBQUERY_PREDICATES, 614 } 615 616 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.AND: exp.And, 618 } 619 620 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 621 TokenType.COLON_EQ: exp.PropertyEQ, 622 } 623 624 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 625 TokenType.OR: exp.Or, 626 } 627 628 EQUALITY = { 629 TokenType.EQ: exp.EQ, 630 TokenType.NEQ: exp.NEQ, 631 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 632 } 633 634 COMPARISON = { 635 TokenType.GT: exp.GT, 636 TokenType.GTE: exp.GTE, 637 TokenType.LT: exp.LT, 638 TokenType.LTE: exp.LTE, 639 } 640 641 BITWISE = { 642 TokenType.AMP: exp.BitwiseAnd, 643 TokenType.CARET: exp.BitwiseXor, 644 TokenType.PIPE: exp.BitwiseOr, 645 } 646 647 TERM = { 648 TokenType.DASH: exp.Sub, 649 TokenType.PLUS: exp.Add, 650 TokenType.MOD: exp.Mod, 651 TokenType.COLLATE: exp.Collate, 652 } 653 654 FACTOR = { 655 TokenType.DIV: exp.IntDiv, 656 TokenType.LR_ARROW: exp.Distance, 657 TokenType.SLASH: exp.Div, 658 TokenType.STAR: exp.Mul, 659 } 660 661 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 662 663 TIMES = { 664 TokenType.TIME, 665 TokenType.TIMETZ, 666 } 667 668 TIMESTAMPS = { 669 TokenType.TIMESTAMP, 670 TokenType.TIMESTAMPTZ, 671 TokenType.TIMESTAMPLTZ, 672 *TIMES, 673 } 674 675 SET_OPERATIONS = { 676 TokenType.UNION, 677 TokenType.INTERSECT, 678 TokenType.EXCEPT, 679 } 680 681 JOIN_METHODS = { 682 TokenType.ASOF, 683 TokenType.NATURAL, 684 TokenType.POSITIONAL, 685 } 686 687 JOIN_SIDES = { 688 TokenType.LEFT, 689 TokenType.RIGHT, 690 TokenType.FULL, 691 } 692 693 JOIN_KINDS = { 694 TokenType.ANTI, 695 TokenType.CROSS, 696 TokenType.INNER, 697 TokenType.OUTER, 698 TokenType.SEMI, 699 TokenType.STRAIGHT_JOIN, 700 } 701 702 JOIN_HINTS: t.Set[str] = set() 703 704 LAMBDAS = { 705 TokenType.ARROW: lambda self, expressions: self.expression( 706 exp.Lambda, 707 this=self._replace_lambda( 708 self._parse_assignment(), 709 expressions, 710 ), 711 expressions=expressions, 712 ), 713 TokenType.FARROW: lambda self, expressions: self.expression( 714 exp.Kwarg, 715 this=exp.var(expressions[0].name), 716 expression=self._parse_assignment(), 717 ), 718 } 719 720 COLUMN_OPERATORS = { 721 TokenType.DOT: None, 722 TokenType.DCOLON: lambda self, this, to: self.expression( 723 exp.Cast if self.STRICT_CAST else exp.TryCast, 724 this=this, 725 to=to, 726 ), 727 TokenType.ARROW: lambda self, this, path: self.expression( 728 exp.JSONExtract, 729 this=this, 730 expression=self.dialect.to_json_path(path), 731 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 732 ), 733 TokenType.DARROW: lambda self, this, path: self.expression( 734 exp.JSONExtractScalar, 735 this=this, 736 expression=self.dialect.to_json_path(path), 737 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 738 ), 739 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 740 exp.JSONBExtract, 741 this=this, 742 expression=path, 743 ), 744 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 745 exp.JSONBExtractScalar, 746 this=this, 747 expression=path, 748 ), 749 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 750 exp.JSONBContains, 751 this=this, 752 expression=key, 753 ), 754 } 755 756 EXPRESSION_PARSERS = { 757 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 exp.Column: lambda self: self._parse_column(), 759 exp.Condition: lambda self: self._parse_assignment(), 760 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 761 exp.Expression: lambda self: self._parse_expression(), 762 exp.From: lambda self: self._parse_from(joins=True), 763 exp.Group: lambda self: self._parse_group(), 764 exp.Having: lambda self: self._parse_having(), 765 exp.Hint: lambda self: self._parse_hint_body(), 766 exp.Identifier: lambda self: self._parse_id_var(), 767 exp.Join: lambda self: self._parse_join(), 768 exp.Lambda: lambda self: self._parse_lambda(), 769 exp.Lateral: lambda self: self._parse_lateral(), 770 exp.Limit: lambda self: self._parse_limit(), 771 exp.Offset: lambda self: self._parse_offset(), 772 exp.Order: lambda self: self._parse_order(), 773 exp.Ordered: lambda self: self._parse_ordered(), 774 exp.Properties: lambda self: self._parse_properties(), 775 exp.Qualify: lambda self: self._parse_qualify(), 776 exp.Returning: lambda self: self._parse_returning(), 777 exp.Select: lambda self: self._parse_select(), 778 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 779 exp.Table: lambda self: self._parse_table_parts(), 780 exp.TableAlias: lambda self: self._parse_table_alias(), 781 exp.Tuple: lambda self: self._parse_value(), 782 exp.Whens: lambda self: self._parse_when_matched(), 783 exp.Where: lambda self: self._parse_where(), 784 exp.Window: lambda self: self._parse_named_window(), 785 exp.With: lambda self: self._parse_with(), 786 "JOIN_TYPE": lambda self: self._parse_join_parts(), 787 } 788 789 STATEMENT_PARSERS = { 790 TokenType.ALTER: lambda self: self._parse_alter(), 791 TokenType.BEGIN: lambda self: self._parse_transaction(), 792 TokenType.CACHE: lambda self: self._parse_cache(), 793 TokenType.COMMENT: lambda self: self._parse_comment(), 794 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 795 TokenType.COPY: lambda self: self._parse_copy(), 796 TokenType.CREATE: lambda self: self._parse_create(), 797 TokenType.DELETE: lambda self: self._parse_delete(), 798 TokenType.DESC: lambda self: self._parse_describe(), 799 TokenType.DESCRIBE: lambda self: self._parse_describe(), 800 TokenType.DROP: lambda self: self._parse_drop(), 801 TokenType.GRANT: lambda self: self._parse_grant(), 802 TokenType.INSERT: lambda self: self._parse_insert(), 803 TokenType.KILL: lambda self: self._parse_kill(), 804 TokenType.LOAD: lambda self: self._parse_load(), 805 TokenType.MERGE: lambda self: self._parse_merge(), 806 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 807 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 808 TokenType.REFRESH: lambda self: self._parse_refresh(), 809 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 810 TokenType.SET: lambda self: self._parse_set(), 811 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 812 TokenType.UNCACHE: lambda self: self._parse_uncache(), 813 TokenType.UPDATE: lambda self: self._parse_update(), 814 TokenType.USE: lambda self: self.expression( 815 exp.Use, 816 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 817 this=self._parse_table(schema=False), 818 ), 819 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 820 } 821 822 UNARY_PARSERS = { 823 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 824 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 825 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 826 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 827 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 828 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 829 } 830 831 STRING_PARSERS = { 832 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 833 exp.RawString, this=token.text 834 ), 835 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 836 exp.National, this=token.text 837 ), 838 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 839 TokenType.STRING: lambda self, token: self.expression( 840 exp.Literal, this=token.text, is_string=True 841 ), 842 TokenType.UNICODE_STRING: lambda self, token: self.expression( 843 exp.UnicodeString, 844 this=token.text, 845 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 846 ), 847 } 848 849 NUMERIC_PARSERS = { 850 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 851 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 852 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 853 TokenType.NUMBER: lambda self, token: self.expression( 854 exp.Literal, this=token.text, is_string=False 855 ), 856 } 857 858 PRIMARY_PARSERS = { 859 **STRING_PARSERS, 860 **NUMERIC_PARSERS, 861 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 862 TokenType.NULL: lambda self, _: self.expression(exp.Null), 863 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 864 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 865 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 866 TokenType.STAR: lambda self, _: self._parse_star_ops(), 867 } 868 869 PLACEHOLDER_PARSERS = { 870 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 871 TokenType.PARAMETER: lambda self: self._parse_parameter(), 872 TokenType.COLON: lambda self: ( 873 self.expression(exp.Placeholder, this=self._prev.text) 874 if self._match_set(self.ID_VAR_TOKENS) 875 else None 876 ), 877 } 878 879 RANGE_PARSERS = { 880 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 881 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 882 TokenType.GLOB: binary_range_parser(exp.Glob), 883 TokenType.ILIKE: binary_range_parser(exp.ILike), 884 TokenType.IN: lambda self, this: self._parse_in(this), 885 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 886 TokenType.IS: lambda self, this: self._parse_is(this), 887 TokenType.LIKE: binary_range_parser(exp.Like), 888 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 889 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 890 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 891 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 892 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 893 } 894 895 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 896 "ALLOWED_VALUES": lambda self: self.expression( 897 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 898 ), 899 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 900 "AUTO": lambda self: self._parse_auto_property(), 901 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 902 "BACKUP": lambda self: self.expression( 903 exp.BackupProperty, this=self._parse_var(any_token=True) 904 ), 905 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 906 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 907 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 908 "CHECKSUM": lambda self: self._parse_checksum(), 909 "CLUSTER BY": lambda self: self._parse_cluster(), 910 "CLUSTERED": lambda self: self._parse_clustered_by(), 911 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 912 exp.CollateProperty, **kwargs 913 ), 914 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 915 "CONTAINS": lambda self: self._parse_contains_property(), 916 "COPY": lambda self: self._parse_copy_property(), 917 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 918 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 919 "DEFINER": lambda self: self._parse_definer(), 920 "DETERMINISTIC": lambda self: self.expression( 921 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 922 ), 923 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 924 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 925 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 926 "DISTKEY": lambda self: self._parse_distkey(), 927 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 928 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 929 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 930 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 931 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 932 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 933 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 934 "FREESPACE": lambda self: self._parse_freespace(), 935 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 936 "HEAP": lambda self: self.expression(exp.HeapProperty), 937 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 938 "IMMUTABLE": lambda self: self.expression( 939 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 940 ), 941 "INHERITS": lambda self: self.expression( 942 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 943 ), 944 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 945 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 946 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 947 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 948 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 949 "LIKE": lambda self: self._parse_create_like(), 950 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 951 "LOCK": lambda self: self._parse_locking(), 952 "LOCKING": lambda self: self._parse_locking(), 953 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 954 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 955 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 956 "MODIFIES": lambda self: self._parse_modifies_property(), 957 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 958 "NO": lambda self: self._parse_no_property(), 959 "ON": lambda self: self._parse_on_property(), 960 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 961 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 962 "PARTITION": lambda self: self._parse_partitioned_of(), 963 "PARTITION BY": lambda self: self._parse_partitioned_by(), 964 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 965 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 966 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 967 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 968 "READS": lambda self: self._parse_reads_property(), 969 "REMOTE": lambda self: self._parse_remote_with_connection(), 970 "RETURNS": lambda self: self._parse_returns(), 971 "STRICT": lambda self: self.expression(exp.StrictProperty), 972 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 973 "ROW": lambda self: self._parse_row(), 974 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 975 "SAMPLE": lambda self: self.expression( 976 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 977 ), 978 "SECURE": lambda self: self.expression(exp.SecureProperty), 979 "SECURITY": lambda self: self._parse_security(), 980 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 981 "SETTINGS": lambda self: self._parse_settings_property(), 982 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 983 "SORTKEY": lambda self: self._parse_sortkey(), 984 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 985 "STABLE": lambda self: self.expression( 986 exp.StabilityProperty, this=exp.Literal.string("STABLE") 987 ), 988 "STORED": lambda self: self._parse_stored(), 989 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 990 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 991 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 992 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 993 "TO": lambda self: self._parse_to_table(), 994 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 995 "TRANSFORM": lambda self: self.expression( 996 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 997 ), 998 "TTL": lambda self: self._parse_ttl(), 999 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1000 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1001 "VOLATILE": lambda self: self._parse_volatile_property(), 1002 "WITH": lambda self: self._parse_with_property(), 1003 } 1004 1005 CONSTRAINT_PARSERS = { 1006 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1007 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1008 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1009 "CHARACTER SET": lambda self: self.expression( 1010 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1011 ), 1012 "CHECK": lambda self: self.expression( 1013 exp.CheckColumnConstraint, 1014 this=self._parse_wrapped(self._parse_assignment), 1015 enforced=self._match_text_seq("ENFORCED"), 1016 ), 1017 "COLLATE": lambda self: self.expression( 1018 exp.CollateColumnConstraint, 1019 this=self._parse_identifier() or self._parse_column(), 1020 ), 1021 "COMMENT": lambda self: self.expression( 1022 exp.CommentColumnConstraint, this=self._parse_string() 1023 ), 1024 "COMPRESS": lambda self: self._parse_compress(), 1025 "CLUSTERED": lambda self: self.expression( 1026 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1027 ), 1028 "NONCLUSTERED": lambda self: self.expression( 1029 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1030 ), 1031 "DEFAULT": lambda self: self.expression( 1032 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1033 ), 1034 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1035 "EPHEMERAL": lambda self: self.expression( 1036 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1037 ), 1038 "EXCLUDE": lambda self: self.expression( 1039 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1040 ), 1041 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1042 "FORMAT": lambda self: self.expression( 1043 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1044 ), 1045 "GENERATED": lambda self: self._parse_generated_as_identity(), 1046 "IDENTITY": lambda self: self._parse_auto_increment(), 1047 "INLINE": lambda self: self._parse_inline(), 1048 "LIKE": lambda self: self._parse_create_like(), 1049 "NOT": lambda self: self._parse_not_constraint(), 1050 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1051 "ON": lambda self: ( 1052 self._match(TokenType.UPDATE) 1053 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1054 ) 1055 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1056 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1057 "PERIOD": lambda self: self._parse_period_for_system_time(), 1058 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1059 "REFERENCES": lambda self: self._parse_references(match=False), 1060 "TITLE": lambda self: self.expression( 1061 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1062 ), 1063 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1064 "UNIQUE": lambda self: self._parse_unique(), 1065 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1066 "WATERMARK": lambda self: self.expression( 1067 exp.WatermarkColumnConstraint, 1068 this=self._match(TokenType.FOR) and self._parse_column(), 1069 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1070 ), 1071 "WITH": lambda self: self.expression( 1072 exp.Properties, expressions=self._parse_wrapped_properties() 1073 ), 1074 } 1075 1076 ALTER_PARSERS = { 1077 "ADD": lambda self: self._parse_alter_table_add(), 1078 "AS": lambda self: self._parse_select(), 1079 "ALTER": lambda self: self._parse_alter_table_alter(), 1080 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1081 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1082 "DROP": lambda self: self._parse_alter_table_drop(), 1083 "RENAME": lambda self: self._parse_alter_table_rename(), 1084 "SET": lambda self: self._parse_alter_table_set(), 1085 "SWAP": lambda self: self.expression( 1086 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1087 ), 1088 } 1089 1090 ALTER_ALTER_PARSERS = { 1091 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1092 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1093 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1094 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1095 } 1096 1097 SCHEMA_UNNAMED_CONSTRAINTS = { 1098 "CHECK", 1099 "EXCLUDE", 1100 "FOREIGN KEY", 1101 "LIKE", 1102 "PERIOD", 1103 "PRIMARY KEY", 1104 "UNIQUE", 1105 "WATERMARK", 1106 } 1107 1108 NO_PAREN_FUNCTION_PARSERS = { 1109 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1110 "CASE": lambda self: self._parse_case(), 1111 "CONNECT_BY_ROOT": lambda self: self.expression( 1112 exp.ConnectByRoot, this=self._parse_column() 1113 ), 1114 "IF": lambda self: self._parse_if(), 1115 } 1116 1117 INVALID_FUNC_NAME_TOKENS = { 1118 TokenType.IDENTIFIER, 1119 TokenType.STRING, 1120 } 1121 1122 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1123 1124 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1125 1126 FUNCTION_PARSERS = { 1127 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1128 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1129 "DECODE": lambda self: self._parse_decode(), 1130 "EXTRACT": lambda self: self._parse_extract(), 1131 "GAP_FILL": lambda self: self._parse_gap_fill(), 1132 "JSON_OBJECT": lambda self: self._parse_json_object(), 1133 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1134 "JSON_TABLE": lambda self: self._parse_json_table(), 1135 "MATCH": lambda self: self._parse_match_against(), 1136 "NORMALIZE": lambda self: self._parse_normalize(), 1137 "OPENJSON": lambda self: self._parse_open_json(), 1138 "OVERLAY": lambda self: self._parse_overlay(), 1139 "POSITION": lambda self: self._parse_position(), 1140 "PREDICT": lambda self: self._parse_predict(), 1141 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1142 "STRING_AGG": lambda self: self._parse_string_agg(), 1143 "SUBSTRING": lambda self: self._parse_substring(), 1144 "TRIM": lambda self: self._parse_trim(), 1145 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1146 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1147 "XMLELEMENT": lambda self: self.expression( 1148 exp.XMLElement, 1149 this=self._match_text_seq("NAME") and self._parse_id_var(), 1150 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1151 ), 1152 } 1153 1154 QUERY_MODIFIER_PARSERS = { 1155 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1156 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1157 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1158 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1159 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1160 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1161 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1162 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1163 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1164 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1165 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1166 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1167 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1168 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1169 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1170 TokenType.CLUSTER_BY: lambda self: ( 1171 "cluster", 1172 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1173 ), 1174 TokenType.DISTRIBUTE_BY: lambda self: ( 1175 "distribute", 1176 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1177 ), 1178 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1179 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1180 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1181 } 1182 1183 SET_PARSERS = { 1184 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1185 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1186 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1187 "TRANSACTION": lambda self: self._parse_set_transaction(), 1188 } 1189 1190 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1191 1192 TYPE_LITERAL_PARSERS = { 1193 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1194 } 1195 1196 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1197 1198 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1199 1200 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1201 1202 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1203 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1204 "ISOLATION": ( 1205 ("LEVEL", "REPEATABLE", "READ"), 1206 ("LEVEL", "READ", "COMMITTED"), 1207 ("LEVEL", "READ", "UNCOMITTED"), 1208 ("LEVEL", "SERIALIZABLE"), 1209 ), 1210 "READ": ("WRITE", "ONLY"), 1211 } 1212 1213 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1214 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1215 ) 1216 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1217 1218 CREATE_SEQUENCE: OPTIONS_TYPE = { 1219 "SCALE": ("EXTEND", "NOEXTEND"), 1220 "SHARD": ("EXTEND", "NOEXTEND"), 1221 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1222 **dict.fromkeys( 1223 ( 1224 "SESSION", 1225 "GLOBAL", 1226 "KEEP", 1227 "NOKEEP", 1228 "ORDER", 1229 "NOORDER", 1230 "NOCACHE", 1231 "CYCLE", 1232 "NOCYCLE", 1233 "NOMINVALUE", 1234 "NOMAXVALUE", 1235 "NOSCALE", 1236 "NOSHARD", 1237 ), 1238 tuple(), 1239 ), 1240 } 1241 1242 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1243 1244 USABLES: OPTIONS_TYPE = dict.fromkeys( 1245 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1246 ) 1247 1248 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1249 1250 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1251 "TYPE": ("EVOLUTION",), 1252 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1253 } 1254 1255 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1256 1257 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1258 1259 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1260 "NOT": ("ENFORCED",), 1261 "MATCH": ( 1262 "FULL", 1263 "PARTIAL", 1264 "SIMPLE", 1265 ), 1266 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1267 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1268 } 1269 1270 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1271 1272 CLONE_KEYWORDS = {"CLONE", "COPY"} 1273 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1274 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1275 1276 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1277 1278 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1279 1280 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1281 1282 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1283 1284 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1285 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1286 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1287 1288 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1289 1290 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1291 1292 ADD_CONSTRAINT_TOKENS = { 1293 TokenType.CONSTRAINT, 1294 TokenType.FOREIGN_KEY, 1295 TokenType.INDEX, 1296 TokenType.KEY, 1297 TokenType.PRIMARY_KEY, 1298 TokenType.UNIQUE, 1299 } 1300 1301 DISTINCT_TOKENS = {TokenType.DISTINCT} 1302 1303 NULL_TOKENS = {TokenType.NULL} 1304 1305 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1306 1307 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1308 1309 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1310 1311 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1312 1313 ODBC_DATETIME_LITERALS = { 1314 "d": exp.Date, 1315 "t": exp.Time, 1316 "ts": exp.Timestamp, 1317 } 1318 1319 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1320 1321 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1322 1323 # The style options for the DESCRIBE statement 1324 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1325 1326 OPERATION_MODIFIERS: t.Set[str] = set() 1327 1328 STRICT_CAST = True 1329 1330 PREFIXED_PIVOT_COLUMNS = False 1331 IDENTIFY_PIVOT_STRINGS = False 1332 1333 LOG_DEFAULTS_TO_LN = False 1334 1335 # Whether ADD is present for each column added by ALTER TABLE 1336 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1337 1338 # Whether the table sample clause expects CSV syntax 1339 TABLESAMPLE_CSV = False 1340 1341 # The default method used for table sampling 1342 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1343 1344 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1345 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1346 1347 # Whether the TRIM function expects the characters to trim as its first argument 1348 TRIM_PATTERN_FIRST = False 1349 1350 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1351 STRING_ALIASES = False 1352 1353 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1354 MODIFIERS_ATTACHED_TO_SET_OP = True 1355 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1356 1357 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1358 NO_PAREN_IF_COMMANDS = True 1359 1360 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1361 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1362 1363 # Whether the `:` operator is used to extract a value from a VARIANT column 1364 COLON_IS_VARIANT_EXTRACT = False 1365 1366 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1367 # If this is True and '(' is not found, the keyword will be treated as an identifier 1368 VALUES_FOLLOWED_BY_PAREN = True 1369 1370 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1371 SUPPORTS_IMPLICIT_UNNEST = False 1372 1373 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1374 INTERVAL_SPANS = True 1375 1376 # Whether a PARTITION clause can follow a table reference 1377 SUPPORTS_PARTITION_SELECTION = False 1378 1379 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1380 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1381 1382 __slots__ = ( 1383 "error_level", 1384 "error_message_context", 1385 "max_errors", 1386 "dialect", 1387 "sql", 1388 "errors", 1389 "_tokens", 1390 "_index", 1391 "_curr", 1392 "_next", 1393 "_prev", 1394 "_prev_comments", 1395 ) 1396 1397 # Autofilled 1398 SHOW_TRIE: t.Dict = {} 1399 SET_TRIE: t.Dict = {} 1400 1401 def __init__( 1402 self, 1403 error_level: t.Optional[ErrorLevel] = None, 1404 error_message_context: int = 100, 1405 max_errors: int = 3, 1406 dialect: DialectType = None, 1407 ): 1408 from sqlglot.dialects import Dialect 1409 1410 self.error_level = error_level or ErrorLevel.IMMEDIATE 1411 self.error_message_context = error_message_context 1412 self.max_errors = max_errors 1413 self.dialect = Dialect.get_or_raise(dialect) 1414 self.reset() 1415 1416 def reset(self): 1417 self.sql = "" 1418 self.errors = [] 1419 self._tokens = [] 1420 self._index = 0 1421 self._curr = None 1422 self._next = None 1423 self._prev = None 1424 self._prev_comments = None 1425 1426 def parse( 1427 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1428 ) -> t.List[t.Optional[exp.Expression]]: 1429 """ 1430 Parses a list of tokens and returns a list of syntax trees, one tree 1431 per parsed SQL statement. 1432 1433 Args: 1434 raw_tokens: The list of tokens. 1435 sql: The original SQL string, used to produce helpful debug messages. 1436 1437 Returns: 1438 The list of the produced syntax trees. 1439 """ 1440 return self._parse( 1441 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1442 ) 1443 1444 def parse_into( 1445 self, 1446 expression_types: exp.IntoType, 1447 raw_tokens: t.List[Token], 1448 sql: t.Optional[str] = None, 1449 ) -> t.List[t.Optional[exp.Expression]]: 1450 """ 1451 Parses a list of tokens into a given Expression type. If a collection of Expression 1452 types is given instead, this method will try to parse the token list into each one 1453 of them, stopping at the first for which the parsing succeeds. 1454 1455 Args: 1456 expression_types: The expression type(s) to try and parse the token list into. 1457 raw_tokens: The list of tokens. 1458 sql: The original SQL string, used to produce helpful debug messages. 1459 1460 Returns: 1461 The target Expression. 1462 """ 1463 errors = [] 1464 for expression_type in ensure_list(expression_types): 1465 parser = self.EXPRESSION_PARSERS.get(expression_type) 1466 if not parser: 1467 raise TypeError(f"No parser registered for {expression_type}") 1468 1469 try: 1470 return self._parse(parser, raw_tokens, sql) 1471 except ParseError as e: 1472 e.errors[0]["into_expression"] = expression_type 1473 errors.append(e) 1474 1475 raise ParseError( 1476 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1477 errors=merge_errors(errors), 1478 ) from errors[-1] 1479 1480 def _parse( 1481 self, 1482 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1483 raw_tokens: t.List[Token], 1484 sql: t.Optional[str] = None, 1485 ) -> t.List[t.Optional[exp.Expression]]: 1486 self.reset() 1487 self.sql = sql or "" 1488 1489 total = len(raw_tokens) 1490 chunks: t.List[t.List[Token]] = [[]] 1491 1492 for i, token in enumerate(raw_tokens): 1493 if token.token_type == TokenType.SEMICOLON: 1494 if token.comments: 1495 chunks.append([token]) 1496 1497 if i < total - 1: 1498 chunks.append([]) 1499 else: 1500 chunks[-1].append(token) 1501 1502 expressions = [] 1503 1504 for tokens in chunks: 1505 self._index = -1 1506 self._tokens = tokens 1507 self._advance() 1508 1509 expressions.append(parse_method(self)) 1510 1511 if self._index < len(self._tokens): 1512 self.raise_error("Invalid expression / Unexpected token") 1513 1514 self.check_errors() 1515 1516 return expressions 1517 1518 def check_errors(self) -> None: 1519 """Logs or raises any found errors, depending on the chosen error level setting.""" 1520 if self.error_level == ErrorLevel.WARN: 1521 for error in self.errors: 1522 logger.error(str(error)) 1523 elif self.error_level == ErrorLevel.RAISE and self.errors: 1524 raise ParseError( 1525 concat_messages(self.errors, self.max_errors), 1526 errors=merge_errors(self.errors), 1527 ) 1528 1529 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1530 """ 1531 Appends an error in the list of recorded errors or raises it, depending on the chosen 1532 error level setting. 1533 """ 1534 token = token or self._curr or self._prev or Token.string("") 1535 start = token.start 1536 end = token.end + 1 1537 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1538 highlight = self.sql[start:end] 1539 end_context = self.sql[end : end + self.error_message_context] 1540 1541 error = ParseError.new( 1542 f"{message}. Line {token.line}, Col: {token.col}.\n" 1543 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1544 description=message, 1545 line=token.line, 1546 col=token.col, 1547 start_context=start_context, 1548 highlight=highlight, 1549 end_context=end_context, 1550 ) 1551 1552 if self.error_level == ErrorLevel.IMMEDIATE: 1553 raise error 1554 1555 self.errors.append(error) 1556 1557 def expression( 1558 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1559 ) -> E: 1560 """ 1561 Creates a new, validated Expression. 1562 1563 Args: 1564 exp_class: The expression class to instantiate. 1565 comments: An optional list of comments to attach to the expression. 1566 kwargs: The arguments to set for the expression along with their respective values. 1567 1568 Returns: 1569 The target expression. 1570 """ 1571 instance = exp_class(**kwargs) 1572 instance.add_comments(comments) if comments else self._add_comments(instance) 1573 return self.validate_expression(instance) 1574 1575 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1576 if expression and self._prev_comments: 1577 expression.add_comments(self._prev_comments) 1578 self._prev_comments = None 1579 1580 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1581 """ 1582 Validates an Expression, making sure that all its mandatory arguments are set. 1583 1584 Args: 1585 expression: The expression to validate. 1586 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1587 1588 Returns: 1589 The validated expression. 1590 """ 1591 if self.error_level != ErrorLevel.IGNORE: 1592 for error_message in expression.error_messages(args): 1593 self.raise_error(error_message) 1594 1595 return expression 1596 1597 def _find_sql(self, start: Token, end: Token) -> str: 1598 return self.sql[start.start : end.end + 1] 1599 1600 def _is_connected(self) -> bool: 1601 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1602 1603 def _advance(self, times: int = 1) -> None: 1604 self._index += times 1605 self._curr = seq_get(self._tokens, self._index) 1606 self._next = seq_get(self._tokens, self._index + 1) 1607 1608 if self._index > 0: 1609 self._prev = self._tokens[self._index - 1] 1610 self._prev_comments = self._prev.comments 1611 else: 1612 self._prev = None 1613 self._prev_comments = None 1614 1615 def _retreat(self, index: int) -> None: 1616 if index != self._index: 1617 self._advance(index - self._index) 1618 1619 def _warn_unsupported(self) -> None: 1620 if len(self._tokens) <= 1: 1621 return 1622 1623 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1624 # interested in emitting a warning for the one being currently processed. 1625 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1626 1627 logger.warning( 1628 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1629 ) 1630 1631 def _parse_command(self) -> exp.Command: 1632 self._warn_unsupported() 1633 return self.expression( 1634 exp.Command, 1635 comments=self._prev_comments, 1636 this=self._prev.text.upper(), 1637 expression=self._parse_string(), 1638 ) 1639 1640 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1641 """ 1642 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1643 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1644 solve this by setting & resetting the parser state accordingly 1645 """ 1646 index = self._index 1647 error_level = self.error_level 1648 1649 self.error_level = ErrorLevel.IMMEDIATE 1650 try: 1651 this = parse_method() 1652 except ParseError: 1653 this = None 1654 finally: 1655 if not this or retreat: 1656 self._retreat(index) 1657 self.error_level = error_level 1658 1659 return this 1660 1661 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1662 start = self._prev 1663 exists = self._parse_exists() if allow_exists else None 1664 1665 self._match(TokenType.ON) 1666 1667 materialized = self._match_text_seq("MATERIALIZED") 1668 kind = self._match_set(self.CREATABLES) and self._prev 1669 if not kind: 1670 return self._parse_as_command(start) 1671 1672 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1673 this = self._parse_user_defined_function(kind=kind.token_type) 1674 elif kind.token_type == TokenType.TABLE: 1675 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1676 elif kind.token_type == TokenType.COLUMN: 1677 this = self._parse_column() 1678 else: 1679 this = self._parse_id_var() 1680 1681 self._match(TokenType.IS) 1682 1683 return self.expression( 1684 exp.Comment, 1685 this=this, 1686 kind=kind.text, 1687 expression=self._parse_string(), 1688 exists=exists, 1689 materialized=materialized, 1690 ) 1691 1692 def _parse_to_table( 1693 self, 1694 ) -> exp.ToTableProperty: 1695 table = self._parse_table_parts(schema=True) 1696 return self.expression(exp.ToTableProperty, this=table) 1697 1698 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1699 def _parse_ttl(self) -> exp.Expression: 1700 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1701 this = self._parse_bitwise() 1702 1703 if self._match_text_seq("DELETE"): 1704 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1705 if self._match_text_seq("RECOMPRESS"): 1706 return self.expression( 1707 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1708 ) 1709 if self._match_text_seq("TO", "DISK"): 1710 return self.expression( 1711 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1712 ) 1713 if self._match_text_seq("TO", "VOLUME"): 1714 return self.expression( 1715 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1716 ) 1717 1718 return this 1719 1720 expressions = self._parse_csv(_parse_ttl_action) 1721 where = self._parse_where() 1722 group = self._parse_group() 1723 1724 aggregates = None 1725 if group and self._match(TokenType.SET): 1726 aggregates = self._parse_csv(self._parse_set_item) 1727 1728 return self.expression( 1729 exp.MergeTreeTTL, 1730 expressions=expressions, 1731 where=where, 1732 group=group, 1733 aggregates=aggregates, 1734 ) 1735 1736 def _parse_statement(self) -> t.Optional[exp.Expression]: 1737 if self._curr is None: 1738 return None 1739 1740 if self._match_set(self.STATEMENT_PARSERS): 1741 comments = self._prev_comments 1742 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1743 stmt.add_comments(comments, prepend=True) 1744 return stmt 1745 1746 if self._match_set(self.dialect.tokenizer.COMMANDS): 1747 return self._parse_command() 1748 1749 expression = self._parse_expression() 1750 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1751 return self._parse_query_modifiers(expression) 1752 1753 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1754 start = self._prev 1755 temporary = self._match(TokenType.TEMPORARY) 1756 materialized = self._match_text_seq("MATERIALIZED") 1757 1758 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1759 if not kind: 1760 return self._parse_as_command(start) 1761 1762 concurrently = self._match_text_seq("CONCURRENTLY") 1763 if_exists = exists or self._parse_exists() 1764 1765 if kind == "COLUMN": 1766 this = self._parse_column() 1767 else: 1768 this = self._parse_table_parts( 1769 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1770 ) 1771 1772 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1773 1774 if self._match(TokenType.L_PAREN, advance=False): 1775 expressions = self._parse_wrapped_csv(self._parse_types) 1776 else: 1777 expressions = None 1778 1779 return self.expression( 1780 exp.Drop, 1781 exists=if_exists, 1782 this=this, 1783 expressions=expressions, 1784 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1785 temporary=temporary, 1786 materialized=materialized, 1787 cascade=self._match_text_seq("CASCADE"), 1788 constraints=self._match_text_seq("CONSTRAINTS"), 1789 purge=self._match_text_seq("PURGE"), 1790 cluster=cluster, 1791 concurrently=concurrently, 1792 ) 1793 1794 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1795 return ( 1796 self._match_text_seq("IF") 1797 and (not not_ or self._match(TokenType.NOT)) 1798 and self._match(TokenType.EXISTS) 1799 ) 1800 1801 def _parse_create(self) -> exp.Create | exp.Command: 1802 # Note: this can't be None because we've matched a statement parser 1803 start = self._prev 1804 1805 replace = ( 1806 start.token_type == TokenType.REPLACE 1807 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1808 or self._match_pair(TokenType.OR, TokenType.ALTER) 1809 ) 1810 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1811 1812 unique = self._match(TokenType.UNIQUE) 1813 1814 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1815 clustered = True 1816 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1817 "COLUMNSTORE" 1818 ): 1819 clustered = False 1820 else: 1821 clustered = None 1822 1823 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1824 self._advance() 1825 1826 properties = None 1827 create_token = self._match_set(self.CREATABLES) and self._prev 1828 1829 if not create_token: 1830 # exp.Properties.Location.POST_CREATE 1831 properties = self._parse_properties() 1832 create_token = self._match_set(self.CREATABLES) and self._prev 1833 1834 if not properties or not create_token: 1835 return self._parse_as_command(start) 1836 1837 concurrently = self._match_text_seq("CONCURRENTLY") 1838 exists = self._parse_exists(not_=True) 1839 this = None 1840 expression: t.Optional[exp.Expression] = None 1841 indexes = None 1842 no_schema_binding = None 1843 begin = None 1844 end = None 1845 clone = None 1846 1847 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1848 nonlocal properties 1849 if properties and temp_props: 1850 properties.expressions.extend(temp_props.expressions) 1851 elif temp_props: 1852 properties = temp_props 1853 1854 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1855 this = self._parse_user_defined_function(kind=create_token.token_type) 1856 1857 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1858 extend_props(self._parse_properties()) 1859 1860 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1861 extend_props(self._parse_properties()) 1862 1863 if not expression: 1864 if self._match(TokenType.COMMAND): 1865 expression = self._parse_as_command(self._prev) 1866 else: 1867 begin = self._match(TokenType.BEGIN) 1868 return_ = self._match_text_seq("RETURN") 1869 1870 if self._match(TokenType.STRING, advance=False): 1871 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1872 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1873 expression = self._parse_string() 1874 extend_props(self._parse_properties()) 1875 else: 1876 expression = self._parse_user_defined_function_expression() 1877 1878 end = self._match_text_seq("END") 1879 1880 if return_: 1881 expression = self.expression(exp.Return, this=expression) 1882 elif create_token.token_type == TokenType.INDEX: 1883 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1884 if not self._match(TokenType.ON): 1885 index = self._parse_id_var() 1886 anonymous = False 1887 else: 1888 index = None 1889 anonymous = True 1890 1891 this = self._parse_index(index=index, anonymous=anonymous) 1892 elif create_token.token_type in self.DB_CREATABLES: 1893 table_parts = self._parse_table_parts( 1894 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1895 ) 1896 1897 # exp.Properties.Location.POST_NAME 1898 self._match(TokenType.COMMA) 1899 extend_props(self._parse_properties(before=True)) 1900 1901 this = self._parse_schema(this=table_parts) 1902 1903 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1904 extend_props(self._parse_properties()) 1905 1906 self._match(TokenType.ALIAS) 1907 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1908 # exp.Properties.Location.POST_ALIAS 1909 extend_props(self._parse_properties()) 1910 1911 if create_token.token_type == TokenType.SEQUENCE: 1912 expression = self._parse_types() 1913 extend_props(self._parse_properties()) 1914 else: 1915 expression = self._parse_ddl_select() 1916 1917 if create_token.token_type == TokenType.TABLE: 1918 # exp.Properties.Location.POST_EXPRESSION 1919 extend_props(self._parse_properties()) 1920 1921 indexes = [] 1922 while True: 1923 index = self._parse_index() 1924 1925 # exp.Properties.Location.POST_INDEX 1926 extend_props(self._parse_properties()) 1927 if not index: 1928 break 1929 else: 1930 self._match(TokenType.COMMA) 1931 indexes.append(index) 1932 elif create_token.token_type == TokenType.VIEW: 1933 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1934 no_schema_binding = True 1935 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1936 extend_props(self._parse_properties()) 1937 1938 shallow = self._match_text_seq("SHALLOW") 1939 1940 if self._match_texts(self.CLONE_KEYWORDS): 1941 copy = self._prev.text.lower() == "copy" 1942 clone = self.expression( 1943 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1944 ) 1945 1946 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1947 return self._parse_as_command(start) 1948 1949 create_kind_text = create_token.text.upper() 1950 return self.expression( 1951 exp.Create, 1952 this=this, 1953 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1954 replace=replace, 1955 refresh=refresh, 1956 unique=unique, 1957 expression=expression, 1958 exists=exists, 1959 properties=properties, 1960 indexes=indexes, 1961 no_schema_binding=no_schema_binding, 1962 begin=begin, 1963 end=end, 1964 clone=clone, 1965 concurrently=concurrently, 1966 clustered=clustered, 1967 ) 1968 1969 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1970 seq = exp.SequenceProperties() 1971 1972 options = [] 1973 index = self._index 1974 1975 while self._curr: 1976 self._match(TokenType.COMMA) 1977 if self._match_text_seq("INCREMENT"): 1978 self._match_text_seq("BY") 1979 self._match_text_seq("=") 1980 seq.set("increment", self._parse_term()) 1981 elif self._match_text_seq("MINVALUE"): 1982 seq.set("minvalue", self._parse_term()) 1983 elif self._match_text_seq("MAXVALUE"): 1984 seq.set("maxvalue", self._parse_term()) 1985 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1986 self._match_text_seq("=") 1987 seq.set("start", self._parse_term()) 1988 elif self._match_text_seq("CACHE"): 1989 # T-SQL allows empty CACHE which is initialized dynamically 1990 seq.set("cache", self._parse_number() or True) 1991 elif self._match_text_seq("OWNED", "BY"): 1992 # "OWNED BY NONE" is the default 1993 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1994 else: 1995 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1996 if opt: 1997 options.append(opt) 1998 else: 1999 break 2000 2001 seq.set("options", options if options else None) 2002 return None if self._index == index else seq 2003 2004 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2005 # only used for teradata currently 2006 self._match(TokenType.COMMA) 2007 2008 kwargs = { 2009 "no": self._match_text_seq("NO"), 2010 "dual": self._match_text_seq("DUAL"), 2011 "before": self._match_text_seq("BEFORE"), 2012 "default": self._match_text_seq("DEFAULT"), 2013 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2014 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2015 "after": self._match_text_seq("AFTER"), 2016 "minimum": self._match_texts(("MIN", "MINIMUM")), 2017 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2018 } 2019 2020 if self._match_texts(self.PROPERTY_PARSERS): 2021 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2022 try: 2023 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2024 except TypeError: 2025 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2026 2027 return None 2028 2029 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2030 return self._parse_wrapped_csv(self._parse_property) 2031 2032 def _parse_property(self) -> t.Optional[exp.Expression]: 2033 if self._match_texts(self.PROPERTY_PARSERS): 2034 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2035 2036 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2037 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2038 2039 if self._match_text_seq("COMPOUND", "SORTKEY"): 2040 return self._parse_sortkey(compound=True) 2041 2042 if self._match_text_seq("SQL", "SECURITY"): 2043 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2044 2045 index = self._index 2046 key = self._parse_column() 2047 2048 if not self._match(TokenType.EQ): 2049 self._retreat(index) 2050 return self._parse_sequence_properties() 2051 2052 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2053 if isinstance(key, exp.Column): 2054 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2055 2056 value = self._parse_bitwise() or self._parse_var(any_token=True) 2057 2058 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2059 if isinstance(value, exp.Column): 2060 value = exp.var(value.name) 2061 2062 return self.expression(exp.Property, this=key, value=value) 2063 2064 def _parse_stored(self) -> exp.FileFormatProperty: 2065 self._match(TokenType.ALIAS) 2066 2067 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2068 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2069 2070 return self.expression( 2071 exp.FileFormatProperty, 2072 this=( 2073 self.expression( 2074 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2075 ) 2076 if input_format or output_format 2077 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2078 ), 2079 ) 2080 2081 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2082 field = self._parse_field() 2083 if isinstance(field, exp.Identifier) and not field.quoted: 2084 field = exp.var(field) 2085 2086 return field 2087 2088 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2089 self._match(TokenType.EQ) 2090 self._match(TokenType.ALIAS) 2091 2092 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2093 2094 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2095 properties = [] 2096 while True: 2097 if before: 2098 prop = self._parse_property_before() 2099 else: 2100 prop = self._parse_property() 2101 if not prop: 2102 break 2103 for p in ensure_list(prop): 2104 properties.append(p) 2105 2106 if properties: 2107 return self.expression(exp.Properties, expressions=properties) 2108 2109 return None 2110 2111 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2112 return self.expression( 2113 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2114 ) 2115 2116 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2117 if self._match_texts(("DEFINER", "INVOKER")): 2118 security_specifier = self._prev.text.upper() 2119 return self.expression(exp.SecurityProperty, this=security_specifier) 2120 return None 2121 2122 def _parse_settings_property(self) -> exp.SettingsProperty: 2123 return self.expression( 2124 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2125 ) 2126 2127 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2128 if self._index >= 2: 2129 pre_volatile_token = self._tokens[self._index - 2] 2130 else: 2131 pre_volatile_token = None 2132 2133 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2134 return exp.VolatileProperty() 2135 2136 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2137 2138 def _parse_retention_period(self) -> exp.Var: 2139 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2140 number = self._parse_number() 2141 number_str = f"{number} " if number else "" 2142 unit = self._parse_var(any_token=True) 2143 return exp.var(f"{number_str}{unit}") 2144 2145 def _parse_system_versioning_property( 2146 self, with_: bool = False 2147 ) -> exp.WithSystemVersioningProperty: 2148 self._match(TokenType.EQ) 2149 prop = self.expression( 2150 exp.WithSystemVersioningProperty, 2151 **{ # type: ignore 2152 "on": True, 2153 "with": with_, 2154 }, 2155 ) 2156 2157 if self._match_text_seq("OFF"): 2158 prop.set("on", False) 2159 return prop 2160 2161 self._match(TokenType.ON) 2162 if self._match(TokenType.L_PAREN): 2163 while self._curr and not self._match(TokenType.R_PAREN): 2164 if self._match_text_seq("HISTORY_TABLE", "="): 2165 prop.set("this", self._parse_table_parts()) 2166 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2167 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2168 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2169 prop.set("retention_period", self._parse_retention_period()) 2170 2171 self._match(TokenType.COMMA) 2172 2173 return prop 2174 2175 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2176 self._match(TokenType.EQ) 2177 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2178 prop = self.expression(exp.DataDeletionProperty, on=on) 2179 2180 if self._match(TokenType.L_PAREN): 2181 while self._curr and not self._match(TokenType.R_PAREN): 2182 if self._match_text_seq("FILTER_COLUMN", "="): 2183 prop.set("filter_column", self._parse_column()) 2184 elif self._match_text_seq("RETENTION_PERIOD", "="): 2185 prop.set("retention_period", self._parse_retention_period()) 2186 2187 self._match(TokenType.COMMA) 2188 2189 return prop 2190 2191 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2192 kind = "HASH" 2193 expressions: t.Optional[t.List[exp.Expression]] = None 2194 if self._match_text_seq("BY", "HASH"): 2195 expressions = self._parse_wrapped_csv(self._parse_id_var) 2196 elif self._match_text_seq("BY", "RANDOM"): 2197 kind = "RANDOM" 2198 2199 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2200 buckets: t.Optional[exp.Expression] = None 2201 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2202 buckets = self._parse_number() 2203 2204 return self.expression( 2205 exp.DistributedByProperty, 2206 expressions=expressions, 2207 kind=kind, 2208 buckets=buckets, 2209 order=self._parse_order(), 2210 ) 2211 2212 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2213 self._match_text_seq("KEY") 2214 expressions = self._parse_wrapped_id_vars() 2215 return self.expression(expr_type, expressions=expressions) 2216 2217 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2218 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2219 prop = self._parse_system_versioning_property(with_=True) 2220 self._match_r_paren() 2221 return prop 2222 2223 if self._match(TokenType.L_PAREN, advance=False): 2224 return self._parse_wrapped_properties() 2225 2226 if self._match_text_seq("JOURNAL"): 2227 return self._parse_withjournaltable() 2228 2229 if self._match_texts(self.VIEW_ATTRIBUTES): 2230 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2231 2232 if self._match_text_seq("DATA"): 2233 return self._parse_withdata(no=False) 2234 elif self._match_text_seq("NO", "DATA"): 2235 return self._parse_withdata(no=True) 2236 2237 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2238 return self._parse_serde_properties(with_=True) 2239 2240 if self._match(TokenType.SCHEMA): 2241 return self.expression( 2242 exp.WithSchemaBindingProperty, 2243 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2244 ) 2245 2246 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2247 return self.expression( 2248 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2249 ) 2250 2251 if not self._next: 2252 return None 2253 2254 return self._parse_withisolatedloading() 2255 2256 def _parse_procedure_option(self) -> exp.Expression | None: 2257 if self._match_text_seq("EXECUTE", "AS"): 2258 return self.expression( 2259 exp.ExecuteAsProperty, 2260 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2261 or self._parse_string(), 2262 ) 2263 2264 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2265 2266 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2267 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2268 self._match(TokenType.EQ) 2269 2270 user = self._parse_id_var() 2271 self._match(TokenType.PARAMETER) 2272 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2273 2274 if not user or not host: 2275 return None 2276 2277 return exp.DefinerProperty(this=f"{user}@{host}") 2278 2279 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2280 self._match(TokenType.TABLE) 2281 self._match(TokenType.EQ) 2282 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2283 2284 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2285 return self.expression(exp.LogProperty, no=no) 2286 2287 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2288 return self.expression(exp.JournalProperty, **kwargs) 2289 2290 def _parse_checksum(self) -> exp.ChecksumProperty: 2291 self._match(TokenType.EQ) 2292 2293 on = None 2294 if self._match(TokenType.ON): 2295 on = True 2296 elif self._match_text_seq("OFF"): 2297 on = False 2298 2299 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2300 2301 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2302 return self.expression( 2303 exp.Cluster, 2304 expressions=( 2305 self._parse_wrapped_csv(self._parse_ordered) 2306 if wrapped 2307 else self._parse_csv(self._parse_ordered) 2308 ), 2309 ) 2310 2311 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2312 self._match_text_seq("BY") 2313 2314 self._match_l_paren() 2315 expressions = self._parse_csv(self._parse_column) 2316 self._match_r_paren() 2317 2318 if self._match_text_seq("SORTED", "BY"): 2319 self._match_l_paren() 2320 sorted_by = self._parse_csv(self._parse_ordered) 2321 self._match_r_paren() 2322 else: 2323 sorted_by = None 2324 2325 self._match(TokenType.INTO) 2326 buckets = self._parse_number() 2327 self._match_text_seq("BUCKETS") 2328 2329 return self.expression( 2330 exp.ClusteredByProperty, 2331 expressions=expressions, 2332 sorted_by=sorted_by, 2333 buckets=buckets, 2334 ) 2335 2336 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2337 if not self._match_text_seq("GRANTS"): 2338 self._retreat(self._index - 1) 2339 return None 2340 2341 return self.expression(exp.CopyGrantsProperty) 2342 2343 def _parse_freespace(self) -> exp.FreespaceProperty: 2344 self._match(TokenType.EQ) 2345 return self.expression( 2346 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2347 ) 2348 2349 def _parse_mergeblockratio( 2350 self, no: bool = False, default: bool = False 2351 ) -> exp.MergeBlockRatioProperty: 2352 if self._match(TokenType.EQ): 2353 return self.expression( 2354 exp.MergeBlockRatioProperty, 2355 this=self._parse_number(), 2356 percent=self._match(TokenType.PERCENT), 2357 ) 2358 2359 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2360 2361 def _parse_datablocksize( 2362 self, 2363 default: t.Optional[bool] = None, 2364 minimum: t.Optional[bool] = None, 2365 maximum: t.Optional[bool] = None, 2366 ) -> exp.DataBlocksizeProperty: 2367 self._match(TokenType.EQ) 2368 size = self._parse_number() 2369 2370 units = None 2371 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2372 units = self._prev.text 2373 2374 return self.expression( 2375 exp.DataBlocksizeProperty, 2376 size=size, 2377 units=units, 2378 default=default, 2379 minimum=minimum, 2380 maximum=maximum, 2381 ) 2382 2383 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2384 self._match(TokenType.EQ) 2385 always = self._match_text_seq("ALWAYS") 2386 manual = self._match_text_seq("MANUAL") 2387 never = self._match_text_seq("NEVER") 2388 default = self._match_text_seq("DEFAULT") 2389 2390 autotemp = None 2391 if self._match_text_seq("AUTOTEMP"): 2392 autotemp = self._parse_schema() 2393 2394 return self.expression( 2395 exp.BlockCompressionProperty, 2396 always=always, 2397 manual=manual, 2398 never=never, 2399 default=default, 2400 autotemp=autotemp, 2401 ) 2402 2403 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2404 index = self._index 2405 no = self._match_text_seq("NO") 2406 concurrent = self._match_text_seq("CONCURRENT") 2407 2408 if not self._match_text_seq("ISOLATED", "LOADING"): 2409 self._retreat(index) 2410 return None 2411 2412 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2413 return self.expression( 2414 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2415 ) 2416 2417 def _parse_locking(self) -> exp.LockingProperty: 2418 if self._match(TokenType.TABLE): 2419 kind = "TABLE" 2420 elif self._match(TokenType.VIEW): 2421 kind = "VIEW" 2422 elif self._match(TokenType.ROW): 2423 kind = "ROW" 2424 elif self._match_text_seq("DATABASE"): 2425 kind = "DATABASE" 2426 else: 2427 kind = None 2428 2429 if kind in ("DATABASE", "TABLE", "VIEW"): 2430 this = self._parse_table_parts() 2431 else: 2432 this = None 2433 2434 if self._match(TokenType.FOR): 2435 for_or_in = "FOR" 2436 elif self._match(TokenType.IN): 2437 for_or_in = "IN" 2438 else: 2439 for_or_in = None 2440 2441 if self._match_text_seq("ACCESS"): 2442 lock_type = "ACCESS" 2443 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2444 lock_type = "EXCLUSIVE" 2445 elif self._match_text_seq("SHARE"): 2446 lock_type = "SHARE" 2447 elif self._match_text_seq("READ"): 2448 lock_type = "READ" 2449 elif self._match_text_seq("WRITE"): 2450 lock_type = "WRITE" 2451 elif self._match_text_seq("CHECKSUM"): 2452 lock_type = "CHECKSUM" 2453 else: 2454 lock_type = None 2455 2456 override = self._match_text_seq("OVERRIDE") 2457 2458 return self.expression( 2459 exp.LockingProperty, 2460 this=this, 2461 kind=kind, 2462 for_or_in=for_or_in, 2463 lock_type=lock_type, 2464 override=override, 2465 ) 2466 2467 def _parse_partition_by(self) -> t.List[exp.Expression]: 2468 if self._match(TokenType.PARTITION_BY): 2469 return self._parse_csv(self._parse_assignment) 2470 return [] 2471 2472 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2473 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2474 if self._match_text_seq("MINVALUE"): 2475 return exp.var("MINVALUE") 2476 if self._match_text_seq("MAXVALUE"): 2477 return exp.var("MAXVALUE") 2478 return self._parse_bitwise() 2479 2480 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2481 expression = None 2482 from_expressions = None 2483 to_expressions = None 2484 2485 if self._match(TokenType.IN): 2486 this = self._parse_wrapped_csv(self._parse_bitwise) 2487 elif self._match(TokenType.FROM): 2488 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2489 self._match_text_seq("TO") 2490 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2491 elif self._match_text_seq("WITH", "(", "MODULUS"): 2492 this = self._parse_number() 2493 self._match_text_seq(",", "REMAINDER") 2494 expression = self._parse_number() 2495 self._match_r_paren() 2496 else: 2497 self.raise_error("Failed to parse partition bound spec.") 2498 2499 return self.expression( 2500 exp.PartitionBoundSpec, 2501 this=this, 2502 expression=expression, 2503 from_expressions=from_expressions, 2504 to_expressions=to_expressions, 2505 ) 2506 2507 # https://www.postgresql.org/docs/current/sql-createtable.html 2508 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2509 if not self._match_text_seq("OF"): 2510 self._retreat(self._index - 1) 2511 return None 2512 2513 this = self._parse_table(schema=True) 2514 2515 if self._match(TokenType.DEFAULT): 2516 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2517 elif self._match_text_seq("FOR", "VALUES"): 2518 expression = self._parse_partition_bound_spec() 2519 else: 2520 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2521 2522 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2523 2524 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2525 self._match(TokenType.EQ) 2526 return self.expression( 2527 exp.PartitionedByProperty, 2528 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2529 ) 2530 2531 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2532 if self._match_text_seq("AND", "STATISTICS"): 2533 statistics = True 2534 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2535 statistics = False 2536 else: 2537 statistics = None 2538 2539 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2540 2541 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2542 if self._match_text_seq("SQL"): 2543 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2544 return None 2545 2546 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2547 if self._match_text_seq("SQL", "DATA"): 2548 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2549 return None 2550 2551 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2552 if self._match_text_seq("PRIMARY", "INDEX"): 2553 return exp.NoPrimaryIndexProperty() 2554 if self._match_text_seq("SQL"): 2555 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2556 return None 2557 2558 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2559 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2560 return exp.OnCommitProperty() 2561 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2562 return exp.OnCommitProperty(delete=True) 2563 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2564 2565 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2566 if self._match_text_seq("SQL", "DATA"): 2567 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2568 return None 2569 2570 def _parse_distkey(self) -> exp.DistKeyProperty: 2571 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2572 2573 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2574 table = self._parse_table(schema=True) 2575 2576 options = [] 2577 while self._match_texts(("INCLUDING", "EXCLUDING")): 2578 this = self._prev.text.upper() 2579 2580 id_var = self._parse_id_var() 2581 if not id_var: 2582 return None 2583 2584 options.append( 2585 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2586 ) 2587 2588 return self.expression(exp.LikeProperty, this=table, expressions=options) 2589 2590 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2591 return self.expression( 2592 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2593 ) 2594 2595 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2596 self._match(TokenType.EQ) 2597 return self.expression( 2598 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2599 ) 2600 2601 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2602 self._match_text_seq("WITH", "CONNECTION") 2603 return self.expression( 2604 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2605 ) 2606 2607 def _parse_returns(self) -> exp.ReturnsProperty: 2608 value: t.Optional[exp.Expression] 2609 null = None 2610 is_table = self._match(TokenType.TABLE) 2611 2612 if is_table: 2613 if self._match(TokenType.LT): 2614 value = self.expression( 2615 exp.Schema, 2616 this="TABLE", 2617 expressions=self._parse_csv(self._parse_struct_types), 2618 ) 2619 if not self._match(TokenType.GT): 2620 self.raise_error("Expecting >") 2621 else: 2622 value = self._parse_schema(exp.var("TABLE")) 2623 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2624 null = True 2625 value = None 2626 else: 2627 value = self._parse_types() 2628 2629 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2630 2631 def _parse_describe(self) -> exp.Describe: 2632 kind = self._match_set(self.CREATABLES) and self._prev.text 2633 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2634 if self._match(TokenType.DOT): 2635 style = None 2636 self._retreat(self._index - 2) 2637 2638 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2639 2640 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2641 this = self._parse_statement() 2642 else: 2643 this = self._parse_table(schema=True) 2644 2645 properties = self._parse_properties() 2646 expressions = properties.expressions if properties else None 2647 partition = self._parse_partition() 2648 return self.expression( 2649 exp.Describe, 2650 this=this, 2651 style=style, 2652 kind=kind, 2653 expressions=expressions, 2654 partition=partition, 2655 format=format, 2656 ) 2657 2658 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2659 kind = self._prev.text.upper() 2660 expressions = [] 2661 2662 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2663 if self._match(TokenType.WHEN): 2664 expression = self._parse_disjunction() 2665 self._match(TokenType.THEN) 2666 else: 2667 expression = None 2668 2669 else_ = self._match(TokenType.ELSE) 2670 2671 if not self._match(TokenType.INTO): 2672 return None 2673 2674 return self.expression( 2675 exp.ConditionalInsert, 2676 this=self.expression( 2677 exp.Insert, 2678 this=self._parse_table(schema=True), 2679 expression=self._parse_derived_table_values(), 2680 ), 2681 expression=expression, 2682 else_=else_, 2683 ) 2684 2685 expression = parse_conditional_insert() 2686 while expression is not None: 2687 expressions.append(expression) 2688 expression = parse_conditional_insert() 2689 2690 return self.expression( 2691 exp.MultitableInserts, 2692 kind=kind, 2693 comments=comments, 2694 expressions=expressions, 2695 source=self._parse_table(), 2696 ) 2697 2698 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2699 comments = [] 2700 hint = self._parse_hint() 2701 overwrite = self._match(TokenType.OVERWRITE) 2702 ignore = self._match(TokenType.IGNORE) 2703 local = self._match_text_seq("LOCAL") 2704 alternative = None 2705 is_function = None 2706 2707 if self._match_text_seq("DIRECTORY"): 2708 this: t.Optional[exp.Expression] = self.expression( 2709 exp.Directory, 2710 this=self._parse_var_or_string(), 2711 local=local, 2712 row_format=self._parse_row_format(match_row=True), 2713 ) 2714 else: 2715 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2716 comments += ensure_list(self._prev_comments) 2717 return self._parse_multitable_inserts(comments) 2718 2719 if self._match(TokenType.OR): 2720 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2721 2722 self._match(TokenType.INTO) 2723 comments += ensure_list(self._prev_comments) 2724 self._match(TokenType.TABLE) 2725 is_function = self._match(TokenType.FUNCTION) 2726 2727 this = ( 2728 self._parse_table(schema=True, parse_partition=True) 2729 if not is_function 2730 else self._parse_function() 2731 ) 2732 2733 returning = self._parse_returning() 2734 2735 return self.expression( 2736 exp.Insert, 2737 comments=comments, 2738 hint=hint, 2739 is_function=is_function, 2740 this=this, 2741 stored=self._match_text_seq("STORED") and self._parse_stored(), 2742 by_name=self._match_text_seq("BY", "NAME"), 2743 exists=self._parse_exists(), 2744 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2745 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2746 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2747 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2748 conflict=self._parse_on_conflict(), 2749 returning=returning or self._parse_returning(), 2750 overwrite=overwrite, 2751 alternative=alternative, 2752 ignore=ignore, 2753 source=self._match(TokenType.TABLE) and self._parse_table(), 2754 ) 2755 2756 def _parse_kill(self) -> exp.Kill: 2757 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2758 2759 return self.expression( 2760 exp.Kill, 2761 this=self._parse_primary(), 2762 kind=kind, 2763 ) 2764 2765 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2766 conflict = self._match_text_seq("ON", "CONFLICT") 2767 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2768 2769 if not conflict and not duplicate: 2770 return None 2771 2772 conflict_keys = None 2773 constraint = None 2774 2775 if conflict: 2776 if self._match_text_seq("ON", "CONSTRAINT"): 2777 constraint = self._parse_id_var() 2778 elif self._match(TokenType.L_PAREN): 2779 conflict_keys = self._parse_csv(self._parse_id_var) 2780 self._match_r_paren() 2781 2782 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2783 if self._prev.token_type == TokenType.UPDATE: 2784 self._match(TokenType.SET) 2785 expressions = self._parse_csv(self._parse_equality) 2786 else: 2787 expressions = None 2788 2789 return self.expression( 2790 exp.OnConflict, 2791 duplicate=duplicate, 2792 expressions=expressions, 2793 action=action, 2794 conflict_keys=conflict_keys, 2795 constraint=constraint, 2796 ) 2797 2798 def _parse_returning(self) -> t.Optional[exp.Returning]: 2799 if not self._match(TokenType.RETURNING): 2800 return None 2801 return self.expression( 2802 exp.Returning, 2803 expressions=self._parse_csv(self._parse_expression), 2804 into=self._match(TokenType.INTO) and self._parse_table_part(), 2805 ) 2806 2807 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2808 if not self._match(TokenType.FORMAT): 2809 return None 2810 return self._parse_row_format() 2811 2812 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2813 index = self._index 2814 with_ = with_ or self._match_text_seq("WITH") 2815 2816 if not self._match(TokenType.SERDE_PROPERTIES): 2817 self._retreat(index) 2818 return None 2819 return self.expression( 2820 exp.SerdeProperties, 2821 **{ # type: ignore 2822 "expressions": self._parse_wrapped_properties(), 2823 "with": with_, 2824 }, 2825 ) 2826 2827 def _parse_row_format( 2828 self, match_row: bool = False 2829 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2830 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2831 return None 2832 2833 if self._match_text_seq("SERDE"): 2834 this = self._parse_string() 2835 2836 serde_properties = self._parse_serde_properties() 2837 2838 return self.expression( 2839 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2840 ) 2841 2842 self._match_text_seq("DELIMITED") 2843 2844 kwargs = {} 2845 2846 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2847 kwargs["fields"] = self._parse_string() 2848 if self._match_text_seq("ESCAPED", "BY"): 2849 kwargs["escaped"] = self._parse_string() 2850 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2851 kwargs["collection_items"] = self._parse_string() 2852 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2853 kwargs["map_keys"] = self._parse_string() 2854 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2855 kwargs["lines"] = self._parse_string() 2856 if self._match_text_seq("NULL", "DEFINED", "AS"): 2857 kwargs["null"] = self._parse_string() 2858 2859 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2860 2861 def _parse_load(self) -> exp.LoadData | exp.Command: 2862 if self._match_text_seq("DATA"): 2863 local = self._match_text_seq("LOCAL") 2864 self._match_text_seq("INPATH") 2865 inpath = self._parse_string() 2866 overwrite = self._match(TokenType.OVERWRITE) 2867 self._match_pair(TokenType.INTO, TokenType.TABLE) 2868 2869 return self.expression( 2870 exp.LoadData, 2871 this=self._parse_table(schema=True), 2872 local=local, 2873 overwrite=overwrite, 2874 inpath=inpath, 2875 partition=self._parse_partition(), 2876 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2877 serde=self._match_text_seq("SERDE") and self._parse_string(), 2878 ) 2879 return self._parse_as_command(self._prev) 2880 2881 def _parse_delete(self) -> exp.Delete: 2882 # This handles MySQL's "Multiple-Table Syntax" 2883 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2884 tables = None 2885 if not self._match(TokenType.FROM, advance=False): 2886 tables = self._parse_csv(self._parse_table) or None 2887 2888 returning = self._parse_returning() 2889 2890 return self.expression( 2891 exp.Delete, 2892 tables=tables, 2893 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2894 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2895 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2896 where=self._parse_where(), 2897 returning=returning or self._parse_returning(), 2898 limit=self._parse_limit(), 2899 ) 2900 2901 def _parse_update(self) -> exp.Update: 2902 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2903 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2904 returning = self._parse_returning() 2905 return self.expression( 2906 exp.Update, 2907 **{ # type: ignore 2908 "this": this, 2909 "expressions": expressions, 2910 "from": self._parse_from(joins=True), 2911 "where": self._parse_where(), 2912 "returning": returning or self._parse_returning(), 2913 "order": self._parse_order(), 2914 "limit": self._parse_limit(), 2915 }, 2916 ) 2917 2918 def _parse_uncache(self) -> exp.Uncache: 2919 if not self._match(TokenType.TABLE): 2920 self.raise_error("Expecting TABLE after UNCACHE") 2921 2922 return self.expression( 2923 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2924 ) 2925 2926 def _parse_cache(self) -> exp.Cache: 2927 lazy = self._match_text_seq("LAZY") 2928 self._match(TokenType.TABLE) 2929 table = self._parse_table(schema=True) 2930 2931 options = [] 2932 if self._match_text_seq("OPTIONS"): 2933 self._match_l_paren() 2934 k = self._parse_string() 2935 self._match(TokenType.EQ) 2936 v = self._parse_string() 2937 options = [k, v] 2938 self._match_r_paren() 2939 2940 self._match(TokenType.ALIAS) 2941 return self.expression( 2942 exp.Cache, 2943 this=table, 2944 lazy=lazy, 2945 options=options, 2946 expression=self._parse_select(nested=True), 2947 ) 2948 2949 def _parse_partition(self) -> t.Optional[exp.Partition]: 2950 if not self._match(TokenType.PARTITION): 2951 return None 2952 2953 return self.expression( 2954 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2955 ) 2956 2957 def _parse_value(self) -> t.Optional[exp.Tuple]: 2958 def _parse_value_expression() -> t.Optional[exp.Expression]: 2959 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2960 return exp.var(self._prev.text.upper()) 2961 return self._parse_expression() 2962 2963 if self._match(TokenType.L_PAREN): 2964 expressions = self._parse_csv(_parse_value_expression) 2965 self._match_r_paren() 2966 return self.expression(exp.Tuple, expressions=expressions) 2967 2968 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2969 expression = self._parse_expression() 2970 if expression: 2971 return self.expression(exp.Tuple, expressions=[expression]) 2972 return None 2973 2974 def _parse_projections(self) -> t.List[exp.Expression]: 2975 return self._parse_expressions() 2976 2977 def _parse_select( 2978 self, 2979 nested: bool = False, 2980 table: bool = False, 2981 parse_subquery_alias: bool = True, 2982 parse_set_operation: bool = True, 2983 ) -> t.Optional[exp.Expression]: 2984 cte = self._parse_with() 2985 2986 if cte: 2987 this = self._parse_statement() 2988 2989 if not this: 2990 self.raise_error("Failed to parse any statement following CTE") 2991 return cte 2992 2993 if "with" in this.arg_types: 2994 this.set("with", cte) 2995 else: 2996 self.raise_error(f"{this.key} does not support CTE") 2997 this = cte 2998 2999 return this 3000 3001 # duckdb supports leading with FROM x 3002 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3003 3004 if self._match(TokenType.SELECT): 3005 comments = self._prev_comments 3006 3007 hint = self._parse_hint() 3008 3009 if self._next and not self._next.token_type == TokenType.DOT: 3010 all_ = self._match(TokenType.ALL) 3011 distinct = self._match_set(self.DISTINCT_TOKENS) 3012 else: 3013 all_, distinct = None, None 3014 3015 kind = ( 3016 self._match(TokenType.ALIAS) 3017 and self._match_texts(("STRUCT", "VALUE")) 3018 and self._prev.text.upper() 3019 ) 3020 3021 if distinct: 3022 distinct = self.expression( 3023 exp.Distinct, 3024 on=self._parse_value() if self._match(TokenType.ON) else None, 3025 ) 3026 3027 if all_ and distinct: 3028 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3029 3030 operation_modifiers = [] 3031 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3032 operation_modifiers.append(exp.var(self._prev.text.upper())) 3033 3034 limit = self._parse_limit(top=True) 3035 projections = self._parse_projections() 3036 3037 this = self.expression( 3038 exp.Select, 3039 kind=kind, 3040 hint=hint, 3041 distinct=distinct, 3042 expressions=projections, 3043 limit=limit, 3044 operation_modifiers=operation_modifiers or None, 3045 ) 3046 this.comments = comments 3047 3048 into = self._parse_into() 3049 if into: 3050 this.set("into", into) 3051 3052 if not from_: 3053 from_ = self._parse_from() 3054 3055 if from_: 3056 this.set("from", from_) 3057 3058 this = self._parse_query_modifiers(this) 3059 elif (table or nested) and self._match(TokenType.L_PAREN): 3060 if self._match(TokenType.PIVOT): 3061 this = self._parse_simplified_pivot() 3062 elif self._match(TokenType.FROM): 3063 this = exp.select("*").from_( 3064 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3065 ) 3066 else: 3067 this = ( 3068 self._parse_table() 3069 if table 3070 else self._parse_select(nested=True, parse_set_operation=False) 3071 ) 3072 3073 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3074 # in case a modifier (e.g. join) is following 3075 if table and isinstance(this, exp.Values) and this.alias: 3076 alias = this.args["alias"].pop() 3077 this = exp.Table(this=this, alias=alias) 3078 3079 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3080 3081 self._match_r_paren() 3082 3083 # We return early here so that the UNION isn't attached to the subquery by the 3084 # following call to _parse_set_operations, but instead becomes the parent node 3085 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3086 elif self._match(TokenType.VALUES, advance=False): 3087 this = self._parse_derived_table_values() 3088 elif from_: 3089 this = exp.select("*").from_(from_.this, copy=False) 3090 elif self._match(TokenType.SUMMARIZE): 3091 table = self._match(TokenType.TABLE) 3092 this = self._parse_select() or self._parse_string() or self._parse_table() 3093 return self.expression(exp.Summarize, this=this, table=table) 3094 elif self._match(TokenType.DESCRIBE): 3095 this = self._parse_describe() 3096 elif self._match_text_seq("STREAM"): 3097 this = self._parse_function() 3098 if this: 3099 this = self.expression(exp.Stream, this=this) 3100 else: 3101 self._retreat(self._index - 1) 3102 else: 3103 this = None 3104 3105 return self._parse_set_operations(this) if parse_set_operation else this 3106 3107 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3108 if not skip_with_token and not self._match(TokenType.WITH): 3109 return None 3110 3111 comments = self._prev_comments 3112 recursive = self._match(TokenType.RECURSIVE) 3113 3114 last_comments = None 3115 expressions = [] 3116 while True: 3117 expressions.append(self._parse_cte()) 3118 if last_comments: 3119 expressions[-1].add_comments(last_comments) 3120 3121 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3122 break 3123 else: 3124 self._match(TokenType.WITH) 3125 3126 last_comments = self._prev_comments 3127 3128 return self.expression( 3129 exp.With, comments=comments, expressions=expressions, recursive=recursive 3130 ) 3131 3132 def _parse_cte(self) -> exp.CTE: 3133 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3134 if not alias or not alias.this: 3135 self.raise_error("Expected CTE to have alias") 3136 3137 self._match(TokenType.ALIAS) 3138 comments = self._prev_comments 3139 3140 if self._match_text_seq("NOT", "MATERIALIZED"): 3141 materialized = False 3142 elif self._match_text_seq("MATERIALIZED"): 3143 materialized = True 3144 else: 3145 materialized = None 3146 3147 return self.expression( 3148 exp.CTE, 3149 this=self._parse_wrapped(self._parse_statement), 3150 alias=alias, 3151 materialized=materialized, 3152 comments=comments, 3153 ) 3154 3155 def _parse_table_alias( 3156 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3157 ) -> t.Optional[exp.TableAlias]: 3158 any_token = self._match(TokenType.ALIAS) 3159 alias = ( 3160 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3161 or self._parse_string_as_identifier() 3162 ) 3163 3164 index = self._index 3165 if self._match(TokenType.L_PAREN): 3166 columns = self._parse_csv(self._parse_function_parameter) 3167 self._match_r_paren() if columns else self._retreat(index) 3168 else: 3169 columns = None 3170 3171 if not alias and not columns: 3172 return None 3173 3174 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3175 3176 # We bubble up comments from the Identifier to the TableAlias 3177 if isinstance(alias, exp.Identifier): 3178 table_alias.add_comments(alias.pop_comments()) 3179 3180 return table_alias 3181 3182 def _parse_subquery( 3183 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3184 ) -> t.Optional[exp.Subquery]: 3185 if not this: 3186 return None 3187 3188 return self.expression( 3189 exp.Subquery, 3190 this=this, 3191 pivots=self._parse_pivots(), 3192 alias=self._parse_table_alias() if parse_alias else None, 3193 sample=self._parse_table_sample(), 3194 ) 3195 3196 def _implicit_unnests_to_explicit(self, this: E) -> E: 3197 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3198 3199 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3200 for i, join in enumerate(this.args.get("joins") or []): 3201 table = join.this 3202 normalized_table = table.copy() 3203 normalized_table.meta["maybe_column"] = True 3204 normalized_table = _norm(normalized_table, dialect=self.dialect) 3205 3206 if isinstance(table, exp.Table) and not join.args.get("on"): 3207 if normalized_table.parts[0].name in refs: 3208 table_as_column = table.to_column() 3209 unnest = exp.Unnest(expressions=[table_as_column]) 3210 3211 # Table.to_column creates a parent Alias node that we want to convert to 3212 # a TableAlias and attach to the Unnest, so it matches the parser's output 3213 if isinstance(table.args.get("alias"), exp.TableAlias): 3214 table_as_column.replace(table_as_column.this) 3215 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3216 3217 table.replace(unnest) 3218 3219 refs.add(normalized_table.alias_or_name) 3220 3221 return this 3222 3223 def _parse_query_modifiers( 3224 self, this: t.Optional[exp.Expression] 3225 ) -> t.Optional[exp.Expression]: 3226 if isinstance(this, (exp.Query, exp.Table)): 3227 for join in self._parse_joins(): 3228 this.append("joins", join) 3229 for lateral in iter(self._parse_lateral, None): 3230 this.append("laterals", lateral) 3231 3232 while True: 3233 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3234 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3235 key, expression = parser(self) 3236 3237 if expression: 3238 this.set(key, expression) 3239 if key == "limit": 3240 offset = expression.args.pop("offset", None) 3241 3242 if offset: 3243 offset = exp.Offset(expression=offset) 3244 this.set("offset", offset) 3245 3246 limit_by_expressions = expression.expressions 3247 expression.set("expressions", None) 3248 offset.set("expressions", limit_by_expressions) 3249 continue 3250 break 3251 3252 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3253 this = self._implicit_unnests_to_explicit(this) 3254 3255 return this 3256 3257 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3258 start = self._curr 3259 while self._curr: 3260 self._advance() 3261 3262 end = self._tokens[self._index - 1] 3263 return exp.Hint(expressions=[self._find_sql(start, end)]) 3264 3265 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3266 return self._parse_function_call() 3267 3268 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3269 start_index = self._index 3270 should_fallback_to_string = False 3271 3272 hints = [] 3273 try: 3274 for hint in iter( 3275 lambda: self._parse_csv( 3276 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3277 ), 3278 [], 3279 ): 3280 hints.extend(hint) 3281 except ParseError: 3282 should_fallback_to_string = True 3283 3284 if should_fallback_to_string or self._curr: 3285 self._retreat(start_index) 3286 return self._parse_hint_fallback_to_string() 3287 3288 return self.expression(exp.Hint, expressions=hints) 3289 3290 def _parse_hint(self) -> t.Optional[exp.Hint]: 3291 if self._match(TokenType.HINT) and self._prev_comments: 3292 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3293 3294 return None 3295 3296 def _parse_into(self) -> t.Optional[exp.Into]: 3297 if not self._match(TokenType.INTO): 3298 return None 3299 3300 temp = self._match(TokenType.TEMPORARY) 3301 unlogged = self._match_text_seq("UNLOGGED") 3302 self._match(TokenType.TABLE) 3303 3304 return self.expression( 3305 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3306 ) 3307 3308 def _parse_from( 3309 self, joins: bool = False, skip_from_token: bool = False 3310 ) -> t.Optional[exp.From]: 3311 if not skip_from_token and not self._match(TokenType.FROM): 3312 return None 3313 3314 return self.expression( 3315 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3316 ) 3317 3318 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3319 return self.expression( 3320 exp.MatchRecognizeMeasure, 3321 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3322 this=self._parse_expression(), 3323 ) 3324 3325 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3326 if not self._match(TokenType.MATCH_RECOGNIZE): 3327 return None 3328 3329 self._match_l_paren() 3330 3331 partition = self._parse_partition_by() 3332 order = self._parse_order() 3333 3334 measures = ( 3335 self._parse_csv(self._parse_match_recognize_measure) 3336 if self._match_text_seq("MEASURES") 3337 else None 3338 ) 3339 3340 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3341 rows = exp.var("ONE ROW PER MATCH") 3342 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3343 text = "ALL ROWS PER MATCH" 3344 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3345 text += " SHOW EMPTY MATCHES" 3346 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3347 text += " OMIT EMPTY MATCHES" 3348 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3349 text += " WITH UNMATCHED ROWS" 3350 rows = exp.var(text) 3351 else: 3352 rows = None 3353 3354 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3355 text = "AFTER MATCH SKIP" 3356 if self._match_text_seq("PAST", "LAST", "ROW"): 3357 text += " PAST LAST ROW" 3358 elif self._match_text_seq("TO", "NEXT", "ROW"): 3359 text += " TO NEXT ROW" 3360 elif self._match_text_seq("TO", "FIRST"): 3361 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3362 elif self._match_text_seq("TO", "LAST"): 3363 text += f" TO LAST {self._advance_any().text}" # type: ignore 3364 after = exp.var(text) 3365 else: 3366 after = None 3367 3368 if self._match_text_seq("PATTERN"): 3369 self._match_l_paren() 3370 3371 if not self._curr: 3372 self.raise_error("Expecting )", self._curr) 3373 3374 paren = 1 3375 start = self._curr 3376 3377 while self._curr and paren > 0: 3378 if self._curr.token_type == TokenType.L_PAREN: 3379 paren += 1 3380 if self._curr.token_type == TokenType.R_PAREN: 3381 paren -= 1 3382 3383 end = self._prev 3384 self._advance() 3385 3386 if paren > 0: 3387 self.raise_error("Expecting )", self._curr) 3388 3389 pattern = exp.var(self._find_sql(start, end)) 3390 else: 3391 pattern = None 3392 3393 define = ( 3394 self._parse_csv(self._parse_name_as_expression) 3395 if self._match_text_seq("DEFINE") 3396 else None 3397 ) 3398 3399 self._match_r_paren() 3400 3401 return self.expression( 3402 exp.MatchRecognize, 3403 partition_by=partition, 3404 order=order, 3405 measures=measures, 3406 rows=rows, 3407 after=after, 3408 pattern=pattern, 3409 define=define, 3410 alias=self._parse_table_alias(), 3411 ) 3412 3413 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3414 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3415 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3416 cross_apply = False 3417 3418 if cross_apply is not None: 3419 this = self._parse_select(table=True) 3420 view = None 3421 outer = None 3422 elif self._match(TokenType.LATERAL): 3423 this = self._parse_select(table=True) 3424 view = self._match(TokenType.VIEW) 3425 outer = self._match(TokenType.OUTER) 3426 else: 3427 return None 3428 3429 if not this: 3430 this = ( 3431 self._parse_unnest() 3432 or self._parse_function() 3433 or self._parse_id_var(any_token=False) 3434 ) 3435 3436 while self._match(TokenType.DOT): 3437 this = exp.Dot( 3438 this=this, 3439 expression=self._parse_function() or self._parse_id_var(any_token=False), 3440 ) 3441 3442 if view: 3443 table = self._parse_id_var(any_token=False) 3444 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3445 table_alias: t.Optional[exp.TableAlias] = self.expression( 3446 exp.TableAlias, this=table, columns=columns 3447 ) 3448 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3449 # We move the alias from the lateral's child node to the lateral itself 3450 table_alias = this.args["alias"].pop() 3451 else: 3452 table_alias = self._parse_table_alias() 3453 3454 return self.expression( 3455 exp.Lateral, 3456 this=this, 3457 view=view, 3458 outer=outer, 3459 alias=table_alias, 3460 cross_apply=cross_apply, 3461 ) 3462 3463 def _parse_join_parts( 3464 self, 3465 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3466 return ( 3467 self._match_set(self.JOIN_METHODS) and self._prev, 3468 self._match_set(self.JOIN_SIDES) and self._prev, 3469 self._match_set(self.JOIN_KINDS) and self._prev, 3470 ) 3471 3472 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3473 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3474 this = self._parse_column() 3475 if isinstance(this, exp.Column): 3476 return this.this 3477 return this 3478 3479 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3480 3481 def _parse_join( 3482 self, skip_join_token: bool = False, parse_bracket: bool = False 3483 ) -> t.Optional[exp.Join]: 3484 if self._match(TokenType.COMMA): 3485 return self.expression(exp.Join, this=self._parse_table()) 3486 3487 index = self._index 3488 method, side, kind = self._parse_join_parts() 3489 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3490 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3491 3492 if not skip_join_token and not join: 3493 self._retreat(index) 3494 kind = None 3495 method = None 3496 side = None 3497 3498 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3499 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3500 3501 if not skip_join_token and not join and not outer_apply and not cross_apply: 3502 return None 3503 3504 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3505 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3506 kwargs["expressions"] = self._parse_csv( 3507 lambda: self._parse_table(parse_bracket=parse_bracket) 3508 ) 3509 3510 if method: 3511 kwargs["method"] = method.text 3512 if side: 3513 kwargs["side"] = side.text 3514 if kind: 3515 kwargs["kind"] = kind.text 3516 if hint: 3517 kwargs["hint"] = hint 3518 3519 if self._match(TokenType.MATCH_CONDITION): 3520 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3521 3522 if self._match(TokenType.ON): 3523 kwargs["on"] = self._parse_assignment() 3524 elif self._match(TokenType.USING): 3525 kwargs["using"] = self._parse_using_identifiers() 3526 elif ( 3527 not (outer_apply or cross_apply) 3528 and not isinstance(kwargs["this"], exp.Unnest) 3529 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3530 ): 3531 index = self._index 3532 joins: t.Optional[list] = list(self._parse_joins()) 3533 3534 if joins and self._match(TokenType.ON): 3535 kwargs["on"] = self._parse_assignment() 3536 elif joins and self._match(TokenType.USING): 3537 kwargs["using"] = self._parse_using_identifiers() 3538 else: 3539 joins = None 3540 self._retreat(index) 3541 3542 kwargs["this"].set("joins", joins if joins else None) 3543 3544 comments = [c for token in (method, side, kind) if token for c in token.comments] 3545 return self.expression(exp.Join, comments=comments, **kwargs) 3546 3547 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3548 this = self._parse_assignment() 3549 3550 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3551 return this 3552 3553 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3554 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3555 3556 return this 3557 3558 def _parse_index_params(self) -> exp.IndexParameters: 3559 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3560 3561 if self._match(TokenType.L_PAREN, advance=False): 3562 columns = self._parse_wrapped_csv(self._parse_with_operator) 3563 else: 3564 columns = None 3565 3566 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3567 partition_by = self._parse_partition_by() 3568 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3569 tablespace = ( 3570 self._parse_var(any_token=True) 3571 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3572 else None 3573 ) 3574 where = self._parse_where() 3575 3576 on = self._parse_field() if self._match(TokenType.ON) else None 3577 3578 return self.expression( 3579 exp.IndexParameters, 3580 using=using, 3581 columns=columns, 3582 include=include, 3583 partition_by=partition_by, 3584 where=where, 3585 with_storage=with_storage, 3586 tablespace=tablespace, 3587 on=on, 3588 ) 3589 3590 def _parse_index( 3591 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3592 ) -> t.Optional[exp.Index]: 3593 if index or anonymous: 3594 unique = None 3595 primary = None 3596 amp = None 3597 3598 self._match(TokenType.ON) 3599 self._match(TokenType.TABLE) # hive 3600 table = self._parse_table_parts(schema=True) 3601 else: 3602 unique = self._match(TokenType.UNIQUE) 3603 primary = self._match_text_seq("PRIMARY") 3604 amp = self._match_text_seq("AMP") 3605 3606 if not self._match(TokenType.INDEX): 3607 return None 3608 3609 index = self._parse_id_var() 3610 table = None 3611 3612 params = self._parse_index_params() 3613 3614 return self.expression( 3615 exp.Index, 3616 this=index, 3617 table=table, 3618 unique=unique, 3619 primary=primary, 3620 amp=amp, 3621 params=params, 3622 ) 3623 3624 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3625 hints: t.List[exp.Expression] = [] 3626 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3627 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3628 hints.append( 3629 self.expression( 3630 exp.WithTableHint, 3631 expressions=self._parse_csv( 3632 lambda: self._parse_function() or self._parse_var(any_token=True) 3633 ), 3634 ) 3635 ) 3636 self._match_r_paren() 3637 else: 3638 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3639 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3640 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3641 3642 self._match_set((TokenType.INDEX, TokenType.KEY)) 3643 if self._match(TokenType.FOR): 3644 hint.set("target", self._advance_any() and self._prev.text.upper()) 3645 3646 hint.set("expressions", self._parse_wrapped_id_vars()) 3647 hints.append(hint) 3648 3649 return hints or None 3650 3651 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3652 return ( 3653 (not schema and self._parse_function(optional_parens=False)) 3654 or self._parse_id_var(any_token=False) 3655 or self._parse_string_as_identifier() 3656 or self._parse_placeholder() 3657 ) 3658 3659 def _parse_table_parts( 3660 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3661 ) -> exp.Table: 3662 catalog = None 3663 db = None 3664 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3665 3666 while self._match(TokenType.DOT): 3667 if catalog: 3668 # This allows nesting the table in arbitrarily many dot expressions if needed 3669 table = self.expression( 3670 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3671 ) 3672 else: 3673 catalog = db 3674 db = table 3675 # "" used for tsql FROM a..b case 3676 table = self._parse_table_part(schema=schema) or "" 3677 3678 if ( 3679 wildcard 3680 and self._is_connected() 3681 and (isinstance(table, exp.Identifier) or not table) 3682 and self._match(TokenType.STAR) 3683 ): 3684 if isinstance(table, exp.Identifier): 3685 table.args["this"] += "*" 3686 else: 3687 table = exp.Identifier(this="*") 3688 3689 # We bubble up comments from the Identifier to the Table 3690 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3691 3692 if is_db_reference: 3693 catalog = db 3694 db = table 3695 table = None 3696 3697 if not table and not is_db_reference: 3698 self.raise_error(f"Expected table name but got {self._curr}") 3699 if not db and is_db_reference: 3700 self.raise_error(f"Expected database name but got {self._curr}") 3701 3702 table = self.expression( 3703 exp.Table, 3704 comments=comments, 3705 this=table, 3706 db=db, 3707 catalog=catalog, 3708 ) 3709 3710 changes = self._parse_changes() 3711 if changes: 3712 table.set("changes", changes) 3713 3714 at_before = self._parse_historical_data() 3715 if at_before: 3716 table.set("when", at_before) 3717 3718 pivots = self._parse_pivots() 3719 if pivots: 3720 table.set("pivots", pivots) 3721 3722 return table 3723 3724 def _parse_table( 3725 self, 3726 schema: bool = False, 3727 joins: bool = False, 3728 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3729 parse_bracket: bool = False, 3730 is_db_reference: bool = False, 3731 parse_partition: bool = False, 3732 ) -> t.Optional[exp.Expression]: 3733 lateral = self._parse_lateral() 3734 if lateral: 3735 return lateral 3736 3737 unnest = self._parse_unnest() 3738 if unnest: 3739 return unnest 3740 3741 values = self._parse_derived_table_values() 3742 if values: 3743 return values 3744 3745 subquery = self._parse_select(table=True) 3746 if subquery: 3747 if not subquery.args.get("pivots"): 3748 subquery.set("pivots", self._parse_pivots()) 3749 return subquery 3750 3751 bracket = parse_bracket and self._parse_bracket(None) 3752 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3753 3754 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3755 self._parse_table 3756 ) 3757 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3758 3759 only = self._match(TokenType.ONLY) 3760 3761 this = t.cast( 3762 exp.Expression, 3763 bracket 3764 or rows_from 3765 or self._parse_bracket( 3766 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3767 ), 3768 ) 3769 3770 if only: 3771 this.set("only", only) 3772 3773 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3774 self._match_text_seq("*") 3775 3776 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3777 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3778 this.set("partition", self._parse_partition()) 3779 3780 if schema: 3781 return self._parse_schema(this=this) 3782 3783 version = self._parse_version() 3784 3785 if version: 3786 this.set("version", version) 3787 3788 if self.dialect.ALIAS_POST_TABLESAMPLE: 3789 this.set("sample", self._parse_table_sample()) 3790 3791 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3792 if alias: 3793 this.set("alias", alias) 3794 3795 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3796 return self.expression( 3797 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3798 ) 3799 3800 this.set("hints", self._parse_table_hints()) 3801 3802 if not this.args.get("pivots"): 3803 this.set("pivots", self._parse_pivots()) 3804 3805 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3806 this.set("sample", self._parse_table_sample()) 3807 3808 if joins: 3809 for join in self._parse_joins(): 3810 this.append("joins", join) 3811 3812 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3813 this.set("ordinality", True) 3814 this.set("alias", self._parse_table_alias()) 3815 3816 return this 3817 3818 def _parse_version(self) -> t.Optional[exp.Version]: 3819 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3820 this = "TIMESTAMP" 3821 elif self._match(TokenType.VERSION_SNAPSHOT): 3822 this = "VERSION" 3823 else: 3824 return None 3825 3826 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3827 kind = self._prev.text.upper() 3828 start = self._parse_bitwise() 3829 self._match_texts(("TO", "AND")) 3830 end = self._parse_bitwise() 3831 expression: t.Optional[exp.Expression] = self.expression( 3832 exp.Tuple, expressions=[start, end] 3833 ) 3834 elif self._match_text_seq("CONTAINED", "IN"): 3835 kind = "CONTAINED IN" 3836 expression = self.expression( 3837 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3838 ) 3839 elif self._match(TokenType.ALL): 3840 kind = "ALL" 3841 expression = None 3842 else: 3843 self._match_text_seq("AS", "OF") 3844 kind = "AS OF" 3845 expression = self._parse_type() 3846 3847 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3848 3849 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3850 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3851 index = self._index 3852 historical_data = None 3853 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3854 this = self._prev.text.upper() 3855 kind = ( 3856 self._match(TokenType.L_PAREN) 3857 and self._match_texts(self.HISTORICAL_DATA_KIND) 3858 and self._prev.text.upper() 3859 ) 3860 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3861 3862 if expression: 3863 self._match_r_paren() 3864 historical_data = self.expression( 3865 exp.HistoricalData, this=this, kind=kind, expression=expression 3866 ) 3867 else: 3868 self._retreat(index) 3869 3870 return historical_data 3871 3872 def _parse_changes(self) -> t.Optional[exp.Changes]: 3873 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3874 return None 3875 3876 information = self._parse_var(any_token=True) 3877 self._match_r_paren() 3878 3879 return self.expression( 3880 exp.Changes, 3881 information=information, 3882 at_before=self._parse_historical_data(), 3883 end=self._parse_historical_data(), 3884 ) 3885 3886 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3887 if not self._match(TokenType.UNNEST): 3888 return None 3889 3890 expressions = self._parse_wrapped_csv(self._parse_equality) 3891 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3892 3893 alias = self._parse_table_alias() if with_alias else None 3894 3895 if alias: 3896 if self.dialect.UNNEST_COLUMN_ONLY: 3897 if alias.args.get("columns"): 3898 self.raise_error("Unexpected extra column alias in unnest.") 3899 3900 alias.set("columns", [alias.this]) 3901 alias.set("this", None) 3902 3903 columns = alias.args.get("columns") or [] 3904 if offset and len(expressions) < len(columns): 3905 offset = columns.pop() 3906 3907 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3908 self._match(TokenType.ALIAS) 3909 offset = self._parse_id_var( 3910 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3911 ) or exp.to_identifier("offset") 3912 3913 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3914 3915 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3916 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3917 if not is_derived and not ( 3918 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3919 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3920 ): 3921 return None 3922 3923 expressions = self._parse_csv(self._parse_value) 3924 alias = self._parse_table_alias() 3925 3926 if is_derived: 3927 self._match_r_paren() 3928 3929 return self.expression( 3930 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3931 ) 3932 3933 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3934 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3935 as_modifier and self._match_text_seq("USING", "SAMPLE") 3936 ): 3937 return None 3938 3939 bucket_numerator = None 3940 bucket_denominator = None 3941 bucket_field = None 3942 percent = None 3943 size = None 3944 seed = None 3945 3946 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3947 matched_l_paren = self._match(TokenType.L_PAREN) 3948 3949 if self.TABLESAMPLE_CSV: 3950 num = None 3951 expressions = self._parse_csv(self._parse_primary) 3952 else: 3953 expressions = None 3954 num = ( 3955 self._parse_factor() 3956 if self._match(TokenType.NUMBER, advance=False) 3957 else self._parse_primary() or self._parse_placeholder() 3958 ) 3959 3960 if self._match_text_seq("BUCKET"): 3961 bucket_numerator = self._parse_number() 3962 self._match_text_seq("OUT", "OF") 3963 bucket_denominator = bucket_denominator = self._parse_number() 3964 self._match(TokenType.ON) 3965 bucket_field = self._parse_field() 3966 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3967 percent = num 3968 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3969 size = num 3970 else: 3971 percent = num 3972 3973 if matched_l_paren: 3974 self._match_r_paren() 3975 3976 if self._match(TokenType.L_PAREN): 3977 method = self._parse_var(upper=True) 3978 seed = self._match(TokenType.COMMA) and self._parse_number() 3979 self._match_r_paren() 3980 elif self._match_texts(("SEED", "REPEATABLE")): 3981 seed = self._parse_wrapped(self._parse_number) 3982 3983 if not method and self.DEFAULT_SAMPLING_METHOD: 3984 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3985 3986 return self.expression( 3987 exp.TableSample, 3988 expressions=expressions, 3989 method=method, 3990 bucket_numerator=bucket_numerator, 3991 bucket_denominator=bucket_denominator, 3992 bucket_field=bucket_field, 3993 percent=percent, 3994 size=size, 3995 seed=seed, 3996 ) 3997 3998 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3999 return list(iter(self._parse_pivot, None)) or None 4000 4001 def _parse_joins(self) -> t.Iterator[exp.Join]: 4002 return iter(self._parse_join, None) 4003 4004 # https://duckdb.org/docs/sql/statements/pivot 4005 def _parse_simplified_pivot(self) -> exp.Pivot: 4006 def _parse_on() -> t.Optional[exp.Expression]: 4007 this = self._parse_bitwise() 4008 return self._parse_in(this) if self._match(TokenType.IN) else this 4009 4010 this = self._parse_table() 4011 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4012 using = self._match(TokenType.USING) and self._parse_csv( 4013 lambda: self._parse_alias(self._parse_function()) 4014 ) 4015 group = self._parse_group() 4016 return self.expression( 4017 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4018 ) 4019 4020 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4021 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4022 this = self._parse_select_or_expression() 4023 4024 self._match(TokenType.ALIAS) 4025 alias = self._parse_bitwise() 4026 if alias: 4027 if isinstance(alias, exp.Column) and not alias.db: 4028 alias = alias.this 4029 return self.expression(exp.PivotAlias, this=this, alias=alias) 4030 4031 return this 4032 4033 value = self._parse_column() 4034 4035 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4036 self.raise_error("Expecting IN (") 4037 4038 if self._match(TokenType.ANY): 4039 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4040 else: 4041 exprs = self._parse_csv(_parse_aliased_expression) 4042 4043 self._match_r_paren() 4044 return self.expression(exp.In, this=value, expressions=exprs) 4045 4046 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4047 index = self._index 4048 include_nulls = None 4049 4050 if self._match(TokenType.PIVOT): 4051 unpivot = False 4052 elif self._match(TokenType.UNPIVOT): 4053 unpivot = True 4054 4055 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4056 if self._match_text_seq("INCLUDE", "NULLS"): 4057 include_nulls = True 4058 elif self._match_text_seq("EXCLUDE", "NULLS"): 4059 include_nulls = False 4060 else: 4061 return None 4062 4063 expressions = [] 4064 4065 if not self._match(TokenType.L_PAREN): 4066 self._retreat(index) 4067 return None 4068 4069 if unpivot: 4070 expressions = self._parse_csv(self._parse_column) 4071 else: 4072 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4073 4074 if not expressions: 4075 self.raise_error("Failed to parse PIVOT's aggregation list") 4076 4077 if not self._match(TokenType.FOR): 4078 self.raise_error("Expecting FOR") 4079 4080 field = self._parse_pivot_in() 4081 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4082 self._parse_bitwise 4083 ) 4084 4085 self._match_r_paren() 4086 4087 pivot = self.expression( 4088 exp.Pivot, 4089 expressions=expressions, 4090 field=field, 4091 unpivot=unpivot, 4092 include_nulls=include_nulls, 4093 default_on_null=default_on_null, 4094 ) 4095 4096 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4097 pivot.set("alias", self._parse_table_alias()) 4098 4099 if not unpivot: 4100 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4101 4102 columns: t.List[exp.Expression] = [] 4103 for fld in pivot.args["field"].expressions: 4104 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4105 for name in names: 4106 if self.PREFIXED_PIVOT_COLUMNS: 4107 name = f"{name}_{field_name}" if name else field_name 4108 else: 4109 name = f"{field_name}_{name}" if name else field_name 4110 4111 columns.append(exp.to_identifier(name)) 4112 4113 pivot.set("columns", columns) 4114 4115 return pivot 4116 4117 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4118 return [agg.alias for agg in aggregations] 4119 4120 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4121 if not skip_where_token and not self._match(TokenType.PREWHERE): 4122 return None 4123 4124 return self.expression( 4125 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4126 ) 4127 4128 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4129 if not skip_where_token and not self._match(TokenType.WHERE): 4130 return None 4131 4132 return self.expression( 4133 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4134 ) 4135 4136 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4137 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4138 return None 4139 4140 elements: t.Dict[str, t.Any] = defaultdict(list) 4141 4142 if self._match(TokenType.ALL): 4143 elements["all"] = True 4144 elif self._match(TokenType.DISTINCT): 4145 elements["all"] = False 4146 4147 while True: 4148 index = self._index 4149 4150 elements["expressions"].extend( 4151 self._parse_csv( 4152 lambda: None 4153 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4154 else self._parse_assignment() 4155 ) 4156 ) 4157 4158 before_with_index = self._index 4159 with_prefix = self._match(TokenType.WITH) 4160 4161 if self._match(TokenType.ROLLUP): 4162 elements["rollup"].append( 4163 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4164 ) 4165 elif self._match(TokenType.CUBE): 4166 elements["cube"].append( 4167 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4168 ) 4169 elif self._match(TokenType.GROUPING_SETS): 4170 elements["grouping_sets"].append( 4171 self.expression( 4172 exp.GroupingSets, 4173 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4174 ) 4175 ) 4176 elif self._match_text_seq("TOTALS"): 4177 elements["totals"] = True # type: ignore 4178 4179 if before_with_index <= self._index <= before_with_index + 1: 4180 self._retreat(before_with_index) 4181 break 4182 4183 if index == self._index: 4184 break 4185 4186 return self.expression(exp.Group, **elements) # type: ignore 4187 4188 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4189 return self.expression( 4190 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4191 ) 4192 4193 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4194 if self._match(TokenType.L_PAREN): 4195 grouping_set = self._parse_csv(self._parse_column) 4196 self._match_r_paren() 4197 return self.expression(exp.Tuple, expressions=grouping_set) 4198 4199 return self._parse_column() 4200 4201 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4202 if not skip_having_token and not self._match(TokenType.HAVING): 4203 return None 4204 return self.expression(exp.Having, this=self._parse_assignment()) 4205 4206 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4207 if not self._match(TokenType.QUALIFY): 4208 return None 4209 return self.expression(exp.Qualify, this=self._parse_assignment()) 4210 4211 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4212 if skip_start_token: 4213 start = None 4214 elif self._match(TokenType.START_WITH): 4215 start = self._parse_assignment() 4216 else: 4217 return None 4218 4219 self._match(TokenType.CONNECT_BY) 4220 nocycle = self._match_text_seq("NOCYCLE") 4221 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4222 exp.Prior, this=self._parse_bitwise() 4223 ) 4224 connect = self._parse_assignment() 4225 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4226 4227 if not start and self._match(TokenType.START_WITH): 4228 start = self._parse_assignment() 4229 4230 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4231 4232 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4233 this = self._parse_id_var(any_token=True) 4234 if self._match(TokenType.ALIAS): 4235 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4236 return this 4237 4238 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4239 if self._match_text_seq("INTERPOLATE"): 4240 return self._parse_wrapped_csv(self._parse_name_as_expression) 4241 return None 4242 4243 def _parse_order( 4244 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4245 ) -> t.Optional[exp.Expression]: 4246 siblings = None 4247 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4248 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4249 return this 4250 4251 siblings = True 4252 4253 return self.expression( 4254 exp.Order, 4255 this=this, 4256 expressions=self._parse_csv(self._parse_ordered), 4257 siblings=siblings, 4258 ) 4259 4260 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4261 if not self._match(token): 4262 return None 4263 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4264 4265 def _parse_ordered( 4266 self, parse_method: t.Optional[t.Callable] = None 4267 ) -> t.Optional[exp.Ordered]: 4268 this = parse_method() if parse_method else self._parse_assignment() 4269 if not this: 4270 return None 4271 4272 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4273 this = exp.var("ALL") 4274 4275 asc = self._match(TokenType.ASC) 4276 desc = self._match(TokenType.DESC) or (asc and False) 4277 4278 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4279 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4280 4281 nulls_first = is_nulls_first or False 4282 explicitly_null_ordered = is_nulls_first or is_nulls_last 4283 4284 if ( 4285 not explicitly_null_ordered 4286 and ( 4287 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4288 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4289 ) 4290 and self.dialect.NULL_ORDERING != "nulls_are_last" 4291 ): 4292 nulls_first = True 4293 4294 if self._match_text_seq("WITH", "FILL"): 4295 with_fill = self.expression( 4296 exp.WithFill, 4297 **{ # type: ignore 4298 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4299 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4300 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4301 "interpolate": self._parse_interpolate(), 4302 }, 4303 ) 4304 else: 4305 with_fill = None 4306 4307 return self.expression( 4308 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4309 ) 4310 4311 def _parse_limit( 4312 self, 4313 this: t.Optional[exp.Expression] = None, 4314 top: bool = False, 4315 skip_limit_token: bool = False, 4316 ) -> t.Optional[exp.Expression]: 4317 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4318 comments = self._prev_comments 4319 if top: 4320 limit_paren = self._match(TokenType.L_PAREN) 4321 expression = self._parse_term() if limit_paren else self._parse_number() 4322 4323 if limit_paren: 4324 self._match_r_paren() 4325 else: 4326 expression = self._parse_term() 4327 4328 if self._match(TokenType.COMMA): 4329 offset = expression 4330 expression = self._parse_term() 4331 else: 4332 offset = None 4333 4334 limit_exp = self.expression( 4335 exp.Limit, 4336 this=this, 4337 expression=expression, 4338 offset=offset, 4339 comments=comments, 4340 expressions=self._parse_limit_by(), 4341 ) 4342 4343 return limit_exp 4344 4345 if self._match(TokenType.FETCH): 4346 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4347 direction = self._prev.text.upper() if direction else "FIRST" 4348 4349 count = self._parse_field(tokens=self.FETCH_TOKENS) 4350 percent = self._match(TokenType.PERCENT) 4351 4352 self._match_set((TokenType.ROW, TokenType.ROWS)) 4353 4354 only = self._match_text_seq("ONLY") 4355 with_ties = self._match_text_seq("WITH", "TIES") 4356 4357 if only and with_ties: 4358 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4359 4360 return self.expression( 4361 exp.Fetch, 4362 direction=direction, 4363 count=count, 4364 percent=percent, 4365 with_ties=with_ties, 4366 ) 4367 4368 return this 4369 4370 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4371 if not self._match(TokenType.OFFSET): 4372 return this 4373 4374 count = self._parse_term() 4375 self._match_set((TokenType.ROW, TokenType.ROWS)) 4376 4377 return self.expression( 4378 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4379 ) 4380 4381 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4382 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4383 4384 def _parse_locks(self) -> t.List[exp.Lock]: 4385 locks = [] 4386 while True: 4387 if self._match_text_seq("FOR", "UPDATE"): 4388 update = True 4389 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4390 "LOCK", "IN", "SHARE", "MODE" 4391 ): 4392 update = False 4393 else: 4394 break 4395 4396 expressions = None 4397 if self._match_text_seq("OF"): 4398 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4399 4400 wait: t.Optional[bool | exp.Expression] = None 4401 if self._match_text_seq("NOWAIT"): 4402 wait = True 4403 elif self._match_text_seq("WAIT"): 4404 wait = self._parse_primary() 4405 elif self._match_text_seq("SKIP", "LOCKED"): 4406 wait = False 4407 4408 locks.append( 4409 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4410 ) 4411 4412 return locks 4413 4414 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4415 while this and self._match_set(self.SET_OPERATIONS): 4416 token_type = self._prev.token_type 4417 4418 if token_type == TokenType.UNION: 4419 operation: t.Type[exp.SetOperation] = exp.Union 4420 elif token_type == TokenType.EXCEPT: 4421 operation = exp.Except 4422 else: 4423 operation = exp.Intersect 4424 4425 comments = self._prev.comments 4426 4427 if self._match(TokenType.DISTINCT): 4428 distinct: t.Optional[bool] = True 4429 elif self._match(TokenType.ALL): 4430 distinct = False 4431 else: 4432 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4433 if distinct is None: 4434 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4435 4436 by_name = self._match_text_seq("BY", "NAME") 4437 expression = self._parse_select(nested=True, parse_set_operation=False) 4438 4439 this = self.expression( 4440 operation, 4441 comments=comments, 4442 this=this, 4443 distinct=distinct, 4444 by_name=by_name, 4445 expression=expression, 4446 ) 4447 4448 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4449 expression = this.expression 4450 4451 if expression: 4452 for arg in self.SET_OP_MODIFIERS: 4453 expr = expression.args.get(arg) 4454 if expr: 4455 this.set(arg, expr.pop()) 4456 4457 return this 4458 4459 def _parse_expression(self) -> t.Optional[exp.Expression]: 4460 return self._parse_alias(self._parse_assignment()) 4461 4462 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4463 this = self._parse_disjunction() 4464 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4465 # This allows us to parse <non-identifier token> := <expr> 4466 this = exp.column( 4467 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4468 ) 4469 4470 while self._match_set(self.ASSIGNMENT): 4471 if isinstance(this, exp.Column) and len(this.parts) == 1: 4472 this = this.this 4473 4474 this = self.expression( 4475 self.ASSIGNMENT[self._prev.token_type], 4476 this=this, 4477 comments=self._prev_comments, 4478 expression=self._parse_assignment(), 4479 ) 4480 4481 return this 4482 4483 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4484 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4485 4486 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4487 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4488 4489 def _parse_equality(self) -> t.Optional[exp.Expression]: 4490 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4491 4492 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4493 return self._parse_tokens(self._parse_range, self.COMPARISON) 4494 4495 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4496 this = this or self._parse_bitwise() 4497 negate = self._match(TokenType.NOT) 4498 4499 if self._match_set(self.RANGE_PARSERS): 4500 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4501 if not expression: 4502 return this 4503 4504 this = expression 4505 elif self._match(TokenType.ISNULL): 4506 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4507 4508 # Postgres supports ISNULL and NOTNULL for conditions. 4509 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4510 if self._match(TokenType.NOTNULL): 4511 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4512 this = self.expression(exp.Not, this=this) 4513 4514 if negate: 4515 this = self._negate_range(this) 4516 4517 if self._match(TokenType.IS): 4518 this = self._parse_is(this) 4519 4520 return this 4521 4522 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4523 if not this: 4524 return this 4525 4526 return self.expression(exp.Not, this=this) 4527 4528 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4529 index = self._index - 1 4530 negate = self._match(TokenType.NOT) 4531 4532 if self._match_text_seq("DISTINCT", "FROM"): 4533 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4534 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4535 4536 if self._match(TokenType.JSON): 4537 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4538 4539 if self._match_text_seq("WITH"): 4540 _with = True 4541 elif self._match_text_seq("WITHOUT"): 4542 _with = False 4543 else: 4544 _with = None 4545 4546 unique = self._match(TokenType.UNIQUE) 4547 self._match_text_seq("KEYS") 4548 expression: t.Optional[exp.Expression] = self.expression( 4549 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4550 ) 4551 else: 4552 expression = self._parse_primary() or self._parse_null() 4553 if not expression: 4554 self._retreat(index) 4555 return None 4556 4557 this = self.expression(exp.Is, this=this, expression=expression) 4558 return self.expression(exp.Not, this=this) if negate else this 4559 4560 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4561 unnest = self._parse_unnest(with_alias=False) 4562 if unnest: 4563 this = self.expression(exp.In, this=this, unnest=unnest) 4564 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4565 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4566 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4567 4568 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4569 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4570 else: 4571 this = self.expression(exp.In, this=this, expressions=expressions) 4572 4573 if matched_l_paren: 4574 self._match_r_paren(this) 4575 elif not self._match(TokenType.R_BRACKET, expression=this): 4576 self.raise_error("Expecting ]") 4577 else: 4578 this = self.expression(exp.In, this=this, field=self._parse_column()) 4579 4580 return this 4581 4582 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4583 low = self._parse_bitwise() 4584 self._match(TokenType.AND) 4585 high = self._parse_bitwise() 4586 return self.expression(exp.Between, this=this, low=low, high=high) 4587 4588 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4589 if not self._match(TokenType.ESCAPE): 4590 return this 4591 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4592 4593 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4594 index = self._index 4595 4596 if not self._match(TokenType.INTERVAL) and match_interval: 4597 return None 4598 4599 if self._match(TokenType.STRING, advance=False): 4600 this = self._parse_primary() 4601 else: 4602 this = self._parse_term() 4603 4604 if not this or ( 4605 isinstance(this, exp.Column) 4606 and not this.table 4607 and not this.this.quoted 4608 and this.name.upper() == "IS" 4609 ): 4610 self._retreat(index) 4611 return None 4612 4613 unit = self._parse_function() or ( 4614 not self._match(TokenType.ALIAS, advance=False) 4615 and self._parse_var(any_token=True, upper=True) 4616 ) 4617 4618 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4619 # each INTERVAL expression into this canonical form so it's easy to transpile 4620 if this and this.is_number: 4621 this = exp.Literal.string(this.to_py()) 4622 elif this and this.is_string: 4623 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4624 if parts and unit: 4625 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4626 unit = None 4627 self._retreat(self._index - 1) 4628 4629 if len(parts) == 1: 4630 this = exp.Literal.string(parts[0][0]) 4631 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4632 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4633 unit = self.expression( 4634 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4635 ) 4636 4637 interval = self.expression(exp.Interval, this=this, unit=unit) 4638 4639 index = self._index 4640 self._match(TokenType.PLUS) 4641 4642 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4643 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4644 return self.expression( 4645 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4646 ) 4647 4648 self._retreat(index) 4649 return interval 4650 4651 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4652 this = self._parse_term() 4653 4654 while True: 4655 if self._match_set(self.BITWISE): 4656 this = self.expression( 4657 self.BITWISE[self._prev.token_type], 4658 this=this, 4659 expression=self._parse_term(), 4660 ) 4661 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4662 this = self.expression( 4663 exp.DPipe, 4664 this=this, 4665 expression=self._parse_term(), 4666 safe=not self.dialect.STRICT_STRING_CONCAT, 4667 ) 4668 elif self._match(TokenType.DQMARK): 4669 this = self.expression( 4670 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4671 ) 4672 elif self._match_pair(TokenType.LT, TokenType.LT): 4673 this = self.expression( 4674 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4675 ) 4676 elif self._match_pair(TokenType.GT, TokenType.GT): 4677 this = self.expression( 4678 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4679 ) 4680 else: 4681 break 4682 4683 return this 4684 4685 def _parse_term(self) -> t.Optional[exp.Expression]: 4686 this = self._parse_factor() 4687 4688 while self._match_set(self.TERM): 4689 klass = self.TERM[self._prev.token_type] 4690 comments = self._prev_comments 4691 expression = self._parse_factor() 4692 4693 this = self.expression(klass, this=this, comments=comments, expression=expression) 4694 4695 if isinstance(this, exp.Collate): 4696 expr = this.expression 4697 4698 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4699 # fallback to Identifier / Var 4700 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4701 ident = expr.this 4702 if isinstance(ident, exp.Identifier): 4703 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4704 4705 return this 4706 4707 def _parse_factor(self) -> t.Optional[exp.Expression]: 4708 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4709 this = parse_method() 4710 4711 while self._match_set(self.FACTOR): 4712 klass = self.FACTOR[self._prev.token_type] 4713 comments = self._prev_comments 4714 expression = parse_method() 4715 4716 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4717 self._retreat(self._index - 1) 4718 return this 4719 4720 this = self.expression(klass, this=this, comments=comments, expression=expression) 4721 4722 if isinstance(this, exp.Div): 4723 this.args["typed"] = self.dialect.TYPED_DIVISION 4724 this.args["safe"] = self.dialect.SAFE_DIVISION 4725 4726 return this 4727 4728 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4729 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4730 4731 def _parse_unary(self) -> t.Optional[exp.Expression]: 4732 if self._match_set(self.UNARY_PARSERS): 4733 return self.UNARY_PARSERS[self._prev.token_type](self) 4734 return self._parse_at_time_zone(self._parse_type()) 4735 4736 def _parse_type( 4737 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4738 ) -> t.Optional[exp.Expression]: 4739 interval = parse_interval and self._parse_interval() 4740 if interval: 4741 return interval 4742 4743 index = self._index 4744 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4745 4746 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4747 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4748 if isinstance(data_type, exp.Cast): 4749 # This constructor can contain ops directly after it, for instance struct unnesting: 4750 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4751 return self._parse_column_ops(data_type) 4752 4753 if data_type: 4754 index2 = self._index 4755 this = self._parse_primary() 4756 4757 if isinstance(this, exp.Literal): 4758 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4759 if parser: 4760 return parser(self, this, data_type) 4761 4762 return self.expression(exp.Cast, this=this, to=data_type) 4763 4764 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4765 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4766 # 4767 # If the index difference here is greater than 1, that means the parser itself must have 4768 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4769 # 4770 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4771 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4772 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4773 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4774 # 4775 # In these cases, we don't really want to return the converted type, but instead retreat 4776 # and try to parse a Column or Identifier in the section below. 4777 if data_type.expressions and index2 - index > 1: 4778 self._retreat(index2) 4779 return self._parse_column_ops(data_type) 4780 4781 self._retreat(index) 4782 4783 if fallback_to_identifier: 4784 return self._parse_id_var() 4785 4786 this = self._parse_column() 4787 return this and self._parse_column_ops(this) 4788 4789 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4790 this = self._parse_type() 4791 if not this: 4792 return None 4793 4794 if isinstance(this, exp.Column) and not this.table: 4795 this = exp.var(this.name.upper()) 4796 4797 return self.expression( 4798 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4799 ) 4800 4801 def _parse_types( 4802 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4803 ) -> t.Optional[exp.Expression]: 4804 index = self._index 4805 4806 this: t.Optional[exp.Expression] = None 4807 prefix = self._match_text_seq("SYSUDTLIB", ".") 4808 4809 if not self._match_set(self.TYPE_TOKENS): 4810 identifier = allow_identifiers and self._parse_id_var( 4811 any_token=False, tokens=(TokenType.VAR,) 4812 ) 4813 if isinstance(identifier, exp.Identifier): 4814 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4815 4816 if len(tokens) != 1: 4817 self.raise_error("Unexpected identifier", self._prev) 4818 4819 if tokens[0].token_type in self.TYPE_TOKENS: 4820 self._prev = tokens[0] 4821 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4822 type_name = identifier.name 4823 4824 while self._match(TokenType.DOT): 4825 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4826 4827 this = exp.DataType.build(type_name, udt=True) 4828 else: 4829 self._retreat(self._index - 1) 4830 return None 4831 else: 4832 return None 4833 4834 type_token = self._prev.token_type 4835 4836 if type_token == TokenType.PSEUDO_TYPE: 4837 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4838 4839 if type_token == TokenType.OBJECT_IDENTIFIER: 4840 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4841 4842 # https://materialize.com/docs/sql/types/map/ 4843 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4844 key_type = self._parse_types( 4845 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4846 ) 4847 if not self._match(TokenType.FARROW): 4848 self._retreat(index) 4849 return None 4850 4851 value_type = self._parse_types( 4852 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4853 ) 4854 if not self._match(TokenType.R_BRACKET): 4855 self._retreat(index) 4856 return None 4857 4858 return exp.DataType( 4859 this=exp.DataType.Type.MAP, 4860 expressions=[key_type, value_type], 4861 nested=True, 4862 prefix=prefix, 4863 ) 4864 4865 nested = type_token in self.NESTED_TYPE_TOKENS 4866 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4867 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4868 expressions = None 4869 maybe_func = False 4870 4871 if self._match(TokenType.L_PAREN): 4872 if is_struct: 4873 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4874 elif nested: 4875 expressions = self._parse_csv( 4876 lambda: self._parse_types( 4877 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4878 ) 4879 ) 4880 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4881 this = expressions[0] 4882 this.set("nullable", True) 4883 self._match_r_paren() 4884 return this 4885 elif type_token in self.ENUM_TYPE_TOKENS: 4886 expressions = self._parse_csv(self._parse_equality) 4887 elif is_aggregate: 4888 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4889 any_token=False, tokens=(TokenType.VAR,) 4890 ) 4891 if not func_or_ident or not self._match(TokenType.COMMA): 4892 return None 4893 expressions = self._parse_csv( 4894 lambda: self._parse_types( 4895 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4896 ) 4897 ) 4898 expressions.insert(0, func_or_ident) 4899 else: 4900 expressions = self._parse_csv(self._parse_type_size) 4901 4902 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4903 if type_token == TokenType.VECTOR and len(expressions) == 2: 4904 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4905 4906 if not expressions or not self._match(TokenType.R_PAREN): 4907 self._retreat(index) 4908 return None 4909 4910 maybe_func = True 4911 4912 values: t.Optional[t.List[exp.Expression]] = None 4913 4914 if nested and self._match(TokenType.LT): 4915 if is_struct: 4916 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4917 else: 4918 expressions = self._parse_csv( 4919 lambda: self._parse_types( 4920 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4921 ) 4922 ) 4923 4924 if not self._match(TokenType.GT): 4925 self.raise_error("Expecting >") 4926 4927 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4928 values = self._parse_csv(self._parse_assignment) 4929 if not values and is_struct: 4930 values = None 4931 self._retreat(self._index - 1) 4932 else: 4933 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4934 4935 if type_token in self.TIMESTAMPS: 4936 if self._match_text_seq("WITH", "TIME", "ZONE"): 4937 maybe_func = False 4938 tz_type = ( 4939 exp.DataType.Type.TIMETZ 4940 if type_token in self.TIMES 4941 else exp.DataType.Type.TIMESTAMPTZ 4942 ) 4943 this = exp.DataType(this=tz_type, expressions=expressions) 4944 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4945 maybe_func = False 4946 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4947 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4948 maybe_func = False 4949 elif type_token == TokenType.INTERVAL: 4950 unit = self._parse_var(upper=True) 4951 if unit: 4952 if self._match_text_seq("TO"): 4953 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4954 4955 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4956 else: 4957 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4958 4959 if maybe_func and check_func: 4960 index2 = self._index 4961 peek = self._parse_string() 4962 4963 if not peek: 4964 self._retreat(index) 4965 return None 4966 4967 self._retreat(index2) 4968 4969 if not this: 4970 if self._match_text_seq("UNSIGNED"): 4971 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4972 if not unsigned_type_token: 4973 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4974 4975 type_token = unsigned_type_token or type_token 4976 4977 this = exp.DataType( 4978 this=exp.DataType.Type[type_token.value], 4979 expressions=expressions, 4980 nested=nested, 4981 prefix=prefix, 4982 ) 4983 4984 # Empty arrays/structs are allowed 4985 if values is not None: 4986 cls = exp.Struct if is_struct else exp.Array 4987 this = exp.cast(cls(expressions=values), this, copy=False) 4988 4989 elif expressions: 4990 this.set("expressions", expressions) 4991 4992 # https://materialize.com/docs/sql/types/list/#type-name 4993 while self._match(TokenType.LIST): 4994 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4995 4996 index = self._index 4997 4998 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4999 matched_array = self._match(TokenType.ARRAY) 5000 5001 while self._curr: 5002 datatype_token = self._prev.token_type 5003 matched_l_bracket = self._match(TokenType.L_BRACKET) 5004 if not matched_l_bracket and not matched_array: 5005 break 5006 5007 matched_array = False 5008 values = self._parse_csv(self._parse_assignment) or None 5009 if ( 5010 values 5011 and not schema 5012 and ( 5013 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5014 ) 5015 ): 5016 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5017 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5018 self._retreat(index) 5019 break 5020 5021 this = exp.DataType( 5022 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5023 ) 5024 self._match(TokenType.R_BRACKET) 5025 5026 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5027 converter = self.TYPE_CONVERTERS.get(this.this) 5028 if converter: 5029 this = converter(t.cast(exp.DataType, this)) 5030 5031 return this 5032 5033 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5034 index = self._index 5035 5036 if ( 5037 self._curr 5038 and self._next 5039 and self._curr.token_type in self.TYPE_TOKENS 5040 and self._next.token_type in self.TYPE_TOKENS 5041 ): 5042 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5043 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5044 this = self._parse_id_var() 5045 else: 5046 this = ( 5047 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5048 or self._parse_id_var() 5049 ) 5050 5051 self._match(TokenType.COLON) 5052 5053 if ( 5054 type_required 5055 and not isinstance(this, exp.DataType) 5056 and not self._match_set(self.TYPE_TOKENS, advance=False) 5057 ): 5058 self._retreat(index) 5059 return self._parse_types() 5060 5061 return self._parse_column_def(this) 5062 5063 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5064 if not self._match_text_seq("AT", "TIME", "ZONE"): 5065 return this 5066 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5067 5068 def _parse_column(self) -> t.Optional[exp.Expression]: 5069 this = self._parse_column_reference() 5070 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5071 5072 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5073 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5074 5075 return column 5076 5077 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5078 this = self._parse_field() 5079 if ( 5080 not this 5081 and self._match(TokenType.VALUES, advance=False) 5082 and self.VALUES_FOLLOWED_BY_PAREN 5083 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5084 ): 5085 this = self._parse_id_var() 5086 5087 if isinstance(this, exp.Identifier): 5088 # We bubble up comments from the Identifier to the Column 5089 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5090 5091 return this 5092 5093 def _parse_colon_as_variant_extract( 5094 self, this: t.Optional[exp.Expression] 5095 ) -> t.Optional[exp.Expression]: 5096 casts = [] 5097 json_path = [] 5098 escape = None 5099 5100 while self._match(TokenType.COLON): 5101 start_index = self._index 5102 5103 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5104 path = self._parse_column_ops( 5105 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5106 ) 5107 5108 # The cast :: operator has a lower precedence than the extraction operator :, so 5109 # we rearrange the AST appropriately to avoid casting the JSON path 5110 while isinstance(path, exp.Cast): 5111 casts.append(path.to) 5112 path = path.this 5113 5114 if casts: 5115 dcolon_offset = next( 5116 i 5117 for i, t in enumerate(self._tokens[start_index:]) 5118 if t.token_type == TokenType.DCOLON 5119 ) 5120 end_token = self._tokens[start_index + dcolon_offset - 1] 5121 else: 5122 end_token = self._prev 5123 5124 if path: 5125 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5126 # it'll roundtrip to a string literal in GET_PATH 5127 if isinstance(path, exp.Identifier) and path.quoted: 5128 escape = True 5129 5130 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5131 5132 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5133 # Databricks transforms it back to the colon/dot notation 5134 if json_path: 5135 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5136 5137 if json_path_expr: 5138 json_path_expr.set("escape", escape) 5139 5140 this = self.expression( 5141 exp.JSONExtract, 5142 this=this, 5143 expression=json_path_expr, 5144 variant_extract=True, 5145 ) 5146 5147 while casts: 5148 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5149 5150 return this 5151 5152 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5153 return self._parse_types() 5154 5155 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5156 this = self._parse_bracket(this) 5157 5158 while self._match_set(self.COLUMN_OPERATORS): 5159 op_token = self._prev.token_type 5160 op = self.COLUMN_OPERATORS.get(op_token) 5161 5162 if op_token == TokenType.DCOLON: 5163 field = self._parse_dcolon() 5164 if not field: 5165 self.raise_error("Expected type") 5166 elif op and self._curr: 5167 field = self._parse_column_reference() or self._parse_bracket() 5168 else: 5169 field = self._parse_field(any_token=True, anonymous_func=True) 5170 5171 if isinstance(field, (exp.Func, exp.Window)) and this: 5172 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5173 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5174 this = exp.replace_tree( 5175 this, 5176 lambda n: ( 5177 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5178 if n.table 5179 else n.this 5180 ) 5181 if isinstance(n, exp.Column) 5182 else n, 5183 ) 5184 5185 if op: 5186 this = op(self, this, field) 5187 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5188 this = self.expression( 5189 exp.Column, 5190 comments=this.comments, 5191 this=field, 5192 table=this.this, 5193 db=this.args.get("table"), 5194 catalog=this.args.get("db"), 5195 ) 5196 elif isinstance(field, exp.Window): 5197 # Move the exp.Dot's to the window's function 5198 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5199 field.set("this", window_func) 5200 this = field 5201 else: 5202 this = self.expression(exp.Dot, this=this, expression=field) 5203 5204 if field and field.comments: 5205 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5206 5207 this = self._parse_bracket(this) 5208 5209 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5210 5211 def _parse_primary(self) -> t.Optional[exp.Expression]: 5212 if self._match_set(self.PRIMARY_PARSERS): 5213 token_type = self._prev.token_type 5214 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5215 5216 if token_type == TokenType.STRING: 5217 expressions = [primary] 5218 while self._match(TokenType.STRING): 5219 expressions.append(exp.Literal.string(self._prev.text)) 5220 5221 if len(expressions) > 1: 5222 return self.expression(exp.Concat, expressions=expressions) 5223 5224 return primary 5225 5226 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5227 return exp.Literal.number(f"0.{self._prev.text}") 5228 5229 if self._match(TokenType.L_PAREN): 5230 comments = self._prev_comments 5231 query = self._parse_select() 5232 5233 if query: 5234 expressions = [query] 5235 else: 5236 expressions = self._parse_expressions() 5237 5238 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5239 5240 if not this and self._match(TokenType.R_PAREN, advance=False): 5241 this = self.expression(exp.Tuple) 5242 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5243 this = self._parse_subquery(this=this, parse_alias=False) 5244 elif isinstance(this, exp.Subquery): 5245 this = self._parse_subquery( 5246 this=self._parse_set_operations(this), parse_alias=False 5247 ) 5248 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5249 this = self.expression(exp.Tuple, expressions=expressions) 5250 else: 5251 this = self.expression(exp.Paren, this=this) 5252 5253 if this: 5254 this.add_comments(comments) 5255 5256 self._match_r_paren(expression=this) 5257 return this 5258 5259 return None 5260 5261 def _parse_field( 5262 self, 5263 any_token: bool = False, 5264 tokens: t.Optional[t.Collection[TokenType]] = None, 5265 anonymous_func: bool = False, 5266 ) -> t.Optional[exp.Expression]: 5267 if anonymous_func: 5268 field = ( 5269 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5270 or self._parse_primary() 5271 ) 5272 else: 5273 field = self._parse_primary() or self._parse_function( 5274 anonymous=anonymous_func, any_token=any_token 5275 ) 5276 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5277 5278 def _parse_function( 5279 self, 5280 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5281 anonymous: bool = False, 5282 optional_parens: bool = True, 5283 any_token: bool = False, 5284 ) -> t.Optional[exp.Expression]: 5285 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5286 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5287 fn_syntax = False 5288 if ( 5289 self._match(TokenType.L_BRACE, advance=False) 5290 and self._next 5291 and self._next.text.upper() == "FN" 5292 ): 5293 self._advance(2) 5294 fn_syntax = True 5295 5296 func = self._parse_function_call( 5297 functions=functions, 5298 anonymous=anonymous, 5299 optional_parens=optional_parens, 5300 any_token=any_token, 5301 ) 5302 5303 if fn_syntax: 5304 self._match(TokenType.R_BRACE) 5305 5306 return func 5307 5308 def _parse_function_call( 5309 self, 5310 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5311 anonymous: bool = False, 5312 optional_parens: bool = True, 5313 any_token: bool = False, 5314 ) -> t.Optional[exp.Expression]: 5315 if not self._curr: 5316 return None 5317 5318 comments = self._curr.comments 5319 token_type = self._curr.token_type 5320 this = self._curr.text 5321 upper = this.upper() 5322 5323 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5324 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5325 self._advance() 5326 return self._parse_window(parser(self)) 5327 5328 if not self._next or self._next.token_type != TokenType.L_PAREN: 5329 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5330 self._advance() 5331 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5332 5333 return None 5334 5335 if any_token: 5336 if token_type in self.RESERVED_TOKENS: 5337 return None 5338 elif token_type not in self.FUNC_TOKENS: 5339 return None 5340 5341 self._advance(2) 5342 5343 parser = self.FUNCTION_PARSERS.get(upper) 5344 if parser and not anonymous: 5345 this = parser(self) 5346 else: 5347 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5348 5349 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5350 this = self.expression( 5351 subquery_predicate, comments=comments, this=self._parse_select() 5352 ) 5353 self._match_r_paren() 5354 return this 5355 5356 if functions is None: 5357 functions = self.FUNCTIONS 5358 5359 function = functions.get(upper) 5360 known_function = function and not anonymous 5361 5362 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5363 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5364 5365 if alias and known_function: 5366 args = self._kv_to_prop_eq(args) 5367 5368 if known_function: 5369 func_builder = t.cast(t.Callable, function) 5370 5371 if "dialect" in func_builder.__code__.co_varnames: 5372 func = func_builder(args, dialect=self.dialect) 5373 else: 5374 func = func_builder(args) 5375 5376 func = self.validate_expression(func, args) 5377 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5378 func.meta["name"] = this 5379 5380 this = func 5381 else: 5382 if token_type == TokenType.IDENTIFIER: 5383 this = exp.Identifier(this=this, quoted=True) 5384 this = self.expression(exp.Anonymous, this=this, expressions=args) 5385 5386 if isinstance(this, exp.Expression): 5387 this.add_comments(comments) 5388 5389 self._match_r_paren(this) 5390 return self._parse_window(this) 5391 5392 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5393 return expression 5394 5395 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5396 transformed = [] 5397 5398 for index, e in enumerate(expressions): 5399 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5400 if isinstance(e, exp.Alias): 5401 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5402 5403 if not isinstance(e, exp.PropertyEQ): 5404 e = self.expression( 5405 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5406 ) 5407 5408 if isinstance(e.this, exp.Column): 5409 e.this.replace(e.this.this) 5410 else: 5411 e = self._to_prop_eq(e, index) 5412 5413 transformed.append(e) 5414 5415 return transformed 5416 5417 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5418 return self._parse_statement() 5419 5420 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5421 return self._parse_column_def(self._parse_id_var()) 5422 5423 def _parse_user_defined_function( 5424 self, kind: t.Optional[TokenType] = None 5425 ) -> t.Optional[exp.Expression]: 5426 this = self._parse_id_var() 5427 5428 while self._match(TokenType.DOT): 5429 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5430 5431 if not self._match(TokenType.L_PAREN): 5432 return this 5433 5434 expressions = self._parse_csv(self._parse_function_parameter) 5435 self._match_r_paren() 5436 return self.expression( 5437 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5438 ) 5439 5440 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5441 literal = self._parse_primary() 5442 if literal: 5443 return self.expression(exp.Introducer, this=token.text, expression=literal) 5444 5445 return self.expression(exp.Identifier, this=token.text) 5446 5447 def _parse_session_parameter(self) -> exp.SessionParameter: 5448 kind = None 5449 this = self._parse_id_var() or self._parse_primary() 5450 5451 if this and self._match(TokenType.DOT): 5452 kind = this.name 5453 this = self._parse_var() or self._parse_primary() 5454 5455 return self.expression(exp.SessionParameter, this=this, kind=kind) 5456 5457 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5458 return self._parse_id_var() 5459 5460 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5461 index = self._index 5462 5463 if self._match(TokenType.L_PAREN): 5464 expressions = t.cast( 5465 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5466 ) 5467 5468 if not self._match(TokenType.R_PAREN): 5469 self._retreat(index) 5470 else: 5471 expressions = [self._parse_lambda_arg()] 5472 5473 if self._match_set(self.LAMBDAS): 5474 return self.LAMBDAS[self._prev.token_type](self, expressions) 5475 5476 self._retreat(index) 5477 5478 this: t.Optional[exp.Expression] 5479 5480 if self._match(TokenType.DISTINCT): 5481 this = self.expression( 5482 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5483 ) 5484 else: 5485 this = self._parse_select_or_expression(alias=alias) 5486 5487 return self._parse_limit( 5488 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5489 ) 5490 5491 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5492 index = self._index 5493 if not self._match(TokenType.L_PAREN): 5494 return this 5495 5496 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5497 # expr can be of both types 5498 if self._match_set(self.SELECT_START_TOKENS): 5499 self._retreat(index) 5500 return this 5501 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5502 self._match_r_paren() 5503 return self.expression(exp.Schema, this=this, expressions=args) 5504 5505 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5506 return self._parse_column_def(self._parse_field(any_token=True)) 5507 5508 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5509 # column defs are not really columns, they're identifiers 5510 if isinstance(this, exp.Column): 5511 this = this.this 5512 5513 kind = self._parse_types(schema=True) 5514 5515 if self._match_text_seq("FOR", "ORDINALITY"): 5516 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5517 5518 constraints: t.List[exp.Expression] = [] 5519 5520 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5521 ("ALIAS", "MATERIALIZED") 5522 ): 5523 persisted = self._prev.text.upper() == "MATERIALIZED" 5524 constraint_kind = exp.ComputedColumnConstraint( 5525 this=self._parse_assignment(), 5526 persisted=persisted or self._match_text_seq("PERSISTED"), 5527 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5528 ) 5529 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5530 elif ( 5531 kind 5532 and self._match(TokenType.ALIAS, advance=False) 5533 and ( 5534 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5535 or (self._next and self._next.token_type == TokenType.L_PAREN) 5536 ) 5537 ): 5538 self._advance() 5539 constraints.append( 5540 self.expression( 5541 exp.ColumnConstraint, 5542 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5543 ) 5544 ) 5545 5546 while True: 5547 constraint = self._parse_column_constraint() 5548 if not constraint: 5549 break 5550 constraints.append(constraint) 5551 5552 if not kind and not constraints: 5553 return this 5554 5555 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5556 5557 def _parse_auto_increment( 5558 self, 5559 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5560 start = None 5561 increment = None 5562 5563 if self._match(TokenType.L_PAREN, advance=False): 5564 args = self._parse_wrapped_csv(self._parse_bitwise) 5565 start = seq_get(args, 0) 5566 increment = seq_get(args, 1) 5567 elif self._match_text_seq("START"): 5568 start = self._parse_bitwise() 5569 self._match_text_seq("INCREMENT") 5570 increment = self._parse_bitwise() 5571 5572 if start and increment: 5573 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5574 5575 return exp.AutoIncrementColumnConstraint() 5576 5577 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5578 if not self._match_text_seq("REFRESH"): 5579 self._retreat(self._index - 1) 5580 return None 5581 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5582 5583 def _parse_compress(self) -> exp.CompressColumnConstraint: 5584 if self._match(TokenType.L_PAREN, advance=False): 5585 return self.expression( 5586 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5587 ) 5588 5589 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5590 5591 def _parse_generated_as_identity( 5592 self, 5593 ) -> ( 5594 exp.GeneratedAsIdentityColumnConstraint 5595 | exp.ComputedColumnConstraint 5596 | exp.GeneratedAsRowColumnConstraint 5597 ): 5598 if self._match_text_seq("BY", "DEFAULT"): 5599 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5600 this = self.expression( 5601 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5602 ) 5603 else: 5604 self._match_text_seq("ALWAYS") 5605 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5606 5607 self._match(TokenType.ALIAS) 5608 5609 if self._match_text_seq("ROW"): 5610 start = self._match_text_seq("START") 5611 if not start: 5612 self._match(TokenType.END) 5613 hidden = self._match_text_seq("HIDDEN") 5614 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5615 5616 identity = self._match_text_seq("IDENTITY") 5617 5618 if self._match(TokenType.L_PAREN): 5619 if self._match(TokenType.START_WITH): 5620 this.set("start", self._parse_bitwise()) 5621 if self._match_text_seq("INCREMENT", "BY"): 5622 this.set("increment", self._parse_bitwise()) 5623 if self._match_text_seq("MINVALUE"): 5624 this.set("minvalue", self._parse_bitwise()) 5625 if self._match_text_seq("MAXVALUE"): 5626 this.set("maxvalue", self._parse_bitwise()) 5627 5628 if self._match_text_seq("CYCLE"): 5629 this.set("cycle", True) 5630 elif self._match_text_seq("NO", "CYCLE"): 5631 this.set("cycle", False) 5632 5633 if not identity: 5634 this.set("expression", self._parse_range()) 5635 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5636 args = self._parse_csv(self._parse_bitwise) 5637 this.set("start", seq_get(args, 0)) 5638 this.set("increment", seq_get(args, 1)) 5639 5640 self._match_r_paren() 5641 5642 return this 5643 5644 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5645 self._match_text_seq("LENGTH") 5646 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5647 5648 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5649 if self._match_text_seq("NULL"): 5650 return self.expression(exp.NotNullColumnConstraint) 5651 if self._match_text_seq("CASESPECIFIC"): 5652 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5653 if self._match_text_seq("FOR", "REPLICATION"): 5654 return self.expression(exp.NotForReplicationColumnConstraint) 5655 5656 # Unconsume the `NOT` token 5657 self._retreat(self._index - 1) 5658 return None 5659 5660 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5661 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5662 5663 procedure_option_follows = ( 5664 self._match(TokenType.WITH, advance=False) 5665 and self._next 5666 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5667 ) 5668 5669 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5670 return self.expression( 5671 exp.ColumnConstraint, 5672 this=this, 5673 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5674 ) 5675 5676 return this 5677 5678 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5679 if not self._match(TokenType.CONSTRAINT): 5680 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5681 5682 return self.expression( 5683 exp.Constraint, 5684 this=self._parse_id_var(), 5685 expressions=self._parse_unnamed_constraints(), 5686 ) 5687 5688 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5689 constraints = [] 5690 while True: 5691 constraint = self._parse_unnamed_constraint() or self._parse_function() 5692 if not constraint: 5693 break 5694 constraints.append(constraint) 5695 5696 return constraints 5697 5698 def _parse_unnamed_constraint( 5699 self, constraints: t.Optional[t.Collection[str]] = None 5700 ) -> t.Optional[exp.Expression]: 5701 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5702 constraints or self.CONSTRAINT_PARSERS 5703 ): 5704 return None 5705 5706 constraint = self._prev.text.upper() 5707 if constraint not in self.CONSTRAINT_PARSERS: 5708 self.raise_error(f"No parser found for schema constraint {constraint}.") 5709 5710 return self.CONSTRAINT_PARSERS[constraint](self) 5711 5712 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5713 return self._parse_id_var(any_token=False) 5714 5715 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5716 self._match_text_seq("KEY") 5717 return self.expression( 5718 exp.UniqueColumnConstraint, 5719 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5720 this=self._parse_schema(self._parse_unique_key()), 5721 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5722 on_conflict=self._parse_on_conflict(), 5723 ) 5724 5725 def _parse_key_constraint_options(self) -> t.List[str]: 5726 options = [] 5727 while True: 5728 if not self._curr: 5729 break 5730 5731 if self._match(TokenType.ON): 5732 action = None 5733 on = self._advance_any() and self._prev.text 5734 5735 if self._match_text_seq("NO", "ACTION"): 5736 action = "NO ACTION" 5737 elif self._match_text_seq("CASCADE"): 5738 action = "CASCADE" 5739 elif self._match_text_seq("RESTRICT"): 5740 action = "RESTRICT" 5741 elif self._match_pair(TokenType.SET, TokenType.NULL): 5742 action = "SET NULL" 5743 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5744 action = "SET DEFAULT" 5745 else: 5746 self.raise_error("Invalid key constraint") 5747 5748 options.append(f"ON {on} {action}") 5749 else: 5750 var = self._parse_var_from_options( 5751 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5752 ) 5753 if not var: 5754 break 5755 options.append(var.name) 5756 5757 return options 5758 5759 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5760 if match and not self._match(TokenType.REFERENCES): 5761 return None 5762 5763 expressions = None 5764 this = self._parse_table(schema=True) 5765 options = self._parse_key_constraint_options() 5766 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5767 5768 def _parse_foreign_key(self) -> exp.ForeignKey: 5769 expressions = self._parse_wrapped_id_vars() 5770 reference = self._parse_references() 5771 options = {} 5772 5773 while self._match(TokenType.ON): 5774 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5775 self.raise_error("Expected DELETE or UPDATE") 5776 5777 kind = self._prev.text.lower() 5778 5779 if self._match_text_seq("NO", "ACTION"): 5780 action = "NO ACTION" 5781 elif self._match(TokenType.SET): 5782 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5783 action = "SET " + self._prev.text.upper() 5784 else: 5785 self._advance() 5786 action = self._prev.text.upper() 5787 5788 options[kind] = action 5789 5790 return self.expression( 5791 exp.ForeignKey, 5792 expressions=expressions, 5793 reference=reference, 5794 **options, # type: ignore 5795 ) 5796 5797 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5798 return self._parse_field() 5799 5800 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5801 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5802 self._retreat(self._index - 1) 5803 return None 5804 5805 id_vars = self._parse_wrapped_id_vars() 5806 return self.expression( 5807 exp.PeriodForSystemTimeConstraint, 5808 this=seq_get(id_vars, 0), 5809 expression=seq_get(id_vars, 1), 5810 ) 5811 5812 def _parse_primary_key( 5813 self, wrapped_optional: bool = False, in_props: bool = False 5814 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5815 desc = ( 5816 self._match_set((TokenType.ASC, TokenType.DESC)) 5817 and self._prev.token_type == TokenType.DESC 5818 ) 5819 5820 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5821 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5822 5823 expressions = self._parse_wrapped_csv( 5824 self._parse_primary_key_part, optional=wrapped_optional 5825 ) 5826 options = self._parse_key_constraint_options() 5827 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5828 5829 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5830 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5831 5832 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5833 """ 5834 Parses a datetime column in ODBC format. We parse the column into the corresponding 5835 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5836 same as we did for `DATE('yyyy-mm-dd')`. 5837 5838 Reference: 5839 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5840 """ 5841 self._match(TokenType.VAR) 5842 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5843 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5844 if not self._match(TokenType.R_BRACE): 5845 self.raise_error("Expected }") 5846 return expression 5847 5848 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5849 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5850 return this 5851 5852 bracket_kind = self._prev.token_type 5853 if ( 5854 bracket_kind == TokenType.L_BRACE 5855 and self._curr 5856 and self._curr.token_type == TokenType.VAR 5857 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5858 ): 5859 return self._parse_odbc_datetime_literal() 5860 5861 expressions = self._parse_csv( 5862 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5863 ) 5864 5865 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5866 self.raise_error("Expected ]") 5867 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5868 self.raise_error("Expected }") 5869 5870 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5871 if bracket_kind == TokenType.L_BRACE: 5872 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5873 elif not this: 5874 this = build_array_constructor( 5875 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5876 ) 5877 else: 5878 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5879 if constructor_type: 5880 return build_array_constructor( 5881 constructor_type, 5882 args=expressions, 5883 bracket_kind=bracket_kind, 5884 dialect=self.dialect, 5885 ) 5886 5887 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5888 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5889 5890 self._add_comments(this) 5891 return self._parse_bracket(this) 5892 5893 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5894 if self._match(TokenType.COLON): 5895 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5896 return this 5897 5898 def _parse_case(self) -> t.Optional[exp.Expression]: 5899 ifs = [] 5900 default = None 5901 5902 comments = self._prev_comments 5903 expression = self._parse_assignment() 5904 5905 while self._match(TokenType.WHEN): 5906 this = self._parse_assignment() 5907 self._match(TokenType.THEN) 5908 then = self._parse_assignment() 5909 ifs.append(self.expression(exp.If, this=this, true=then)) 5910 5911 if self._match(TokenType.ELSE): 5912 default = self._parse_assignment() 5913 5914 if not self._match(TokenType.END): 5915 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5916 default = exp.column("interval") 5917 else: 5918 self.raise_error("Expected END after CASE", self._prev) 5919 5920 return self.expression( 5921 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5922 ) 5923 5924 def _parse_if(self) -> t.Optional[exp.Expression]: 5925 if self._match(TokenType.L_PAREN): 5926 args = self._parse_csv(self._parse_assignment) 5927 this = self.validate_expression(exp.If.from_arg_list(args), args) 5928 self._match_r_paren() 5929 else: 5930 index = self._index - 1 5931 5932 if self.NO_PAREN_IF_COMMANDS and index == 0: 5933 return self._parse_as_command(self._prev) 5934 5935 condition = self._parse_assignment() 5936 5937 if not condition: 5938 self._retreat(index) 5939 return None 5940 5941 self._match(TokenType.THEN) 5942 true = self._parse_assignment() 5943 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5944 self._match(TokenType.END) 5945 this = self.expression(exp.If, this=condition, true=true, false=false) 5946 5947 return this 5948 5949 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5950 if not self._match_text_seq("VALUE", "FOR"): 5951 self._retreat(self._index - 1) 5952 return None 5953 5954 return self.expression( 5955 exp.NextValueFor, 5956 this=self._parse_column(), 5957 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5958 ) 5959 5960 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5961 this = self._parse_function() or self._parse_var_or_string(upper=True) 5962 5963 if self._match(TokenType.FROM): 5964 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5965 5966 if not self._match(TokenType.COMMA): 5967 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5968 5969 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5970 5971 def _parse_gap_fill(self) -> exp.GapFill: 5972 self._match(TokenType.TABLE) 5973 this = self._parse_table() 5974 5975 self._match(TokenType.COMMA) 5976 args = [this, *self._parse_csv(self._parse_lambda)] 5977 5978 gap_fill = exp.GapFill.from_arg_list(args) 5979 return self.validate_expression(gap_fill, args) 5980 5981 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5982 this = self._parse_assignment() 5983 5984 if not self._match(TokenType.ALIAS): 5985 if self._match(TokenType.COMMA): 5986 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5987 5988 self.raise_error("Expected AS after CAST") 5989 5990 fmt = None 5991 to = self._parse_types() 5992 5993 if self._match(TokenType.FORMAT): 5994 fmt_string = self._parse_string() 5995 fmt = self._parse_at_time_zone(fmt_string) 5996 5997 if not to: 5998 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5999 if to.this in exp.DataType.TEMPORAL_TYPES: 6000 this = self.expression( 6001 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6002 this=this, 6003 format=exp.Literal.string( 6004 format_time( 6005 fmt_string.this if fmt_string else "", 6006 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6007 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6008 ) 6009 ), 6010 safe=safe, 6011 ) 6012 6013 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6014 this.set("zone", fmt.args["zone"]) 6015 return this 6016 elif not to: 6017 self.raise_error("Expected TYPE after CAST") 6018 elif isinstance(to, exp.Identifier): 6019 to = exp.DataType.build(to.name, udt=True) 6020 elif to.this == exp.DataType.Type.CHAR: 6021 if self._match(TokenType.CHARACTER_SET): 6022 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6023 6024 return self.expression( 6025 exp.Cast if strict else exp.TryCast, 6026 this=this, 6027 to=to, 6028 format=fmt, 6029 safe=safe, 6030 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6031 ) 6032 6033 def _parse_string_agg(self) -> exp.GroupConcat: 6034 if self._match(TokenType.DISTINCT): 6035 args: t.List[t.Optional[exp.Expression]] = [ 6036 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6037 ] 6038 if self._match(TokenType.COMMA): 6039 args.extend(self._parse_csv(self._parse_assignment)) 6040 else: 6041 args = self._parse_csv(self._parse_assignment) # type: ignore 6042 6043 if self._match_text_seq("ON", "OVERFLOW"): 6044 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6045 if self._match_text_seq("ERROR"): 6046 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6047 else: 6048 self._match_text_seq("TRUNCATE") 6049 on_overflow = self.expression( 6050 exp.OverflowTruncateBehavior, 6051 this=self._parse_string(), 6052 with_count=( 6053 self._match_text_seq("WITH", "COUNT") 6054 or not self._match_text_seq("WITHOUT", "COUNT") 6055 ), 6056 ) 6057 else: 6058 on_overflow = None 6059 6060 index = self._index 6061 if not self._match(TokenType.R_PAREN) and args: 6062 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6063 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6064 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6065 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6066 6067 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6068 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6069 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6070 if not self._match_text_seq("WITHIN", "GROUP"): 6071 self._retreat(index) 6072 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6073 6074 # The corresponding match_r_paren will be called in parse_function (caller) 6075 self._match_l_paren() 6076 6077 return self.expression( 6078 exp.GroupConcat, 6079 this=self._parse_order(this=seq_get(args, 0)), 6080 separator=seq_get(args, 1), 6081 on_overflow=on_overflow, 6082 ) 6083 6084 def _parse_convert( 6085 self, strict: bool, safe: t.Optional[bool] = None 6086 ) -> t.Optional[exp.Expression]: 6087 this = self._parse_bitwise() 6088 6089 if self._match(TokenType.USING): 6090 to: t.Optional[exp.Expression] = self.expression( 6091 exp.CharacterSet, this=self._parse_var() 6092 ) 6093 elif self._match(TokenType.COMMA): 6094 to = self._parse_types() 6095 else: 6096 to = None 6097 6098 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6099 6100 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6101 """ 6102 There are generally two variants of the DECODE function: 6103 6104 - DECODE(bin, charset) 6105 - DECODE(expression, search, result [, search, result] ... [, default]) 6106 6107 The second variant will always be parsed into a CASE expression. Note that NULL 6108 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6109 instead of relying on pattern matching. 6110 """ 6111 args = self._parse_csv(self._parse_assignment) 6112 6113 if len(args) < 3: 6114 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6115 6116 expression, *expressions = args 6117 if not expression: 6118 return None 6119 6120 ifs = [] 6121 for search, result in zip(expressions[::2], expressions[1::2]): 6122 if not search or not result: 6123 return None 6124 6125 if isinstance(search, exp.Literal): 6126 ifs.append( 6127 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6128 ) 6129 elif isinstance(search, exp.Null): 6130 ifs.append( 6131 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6132 ) 6133 else: 6134 cond = exp.or_( 6135 exp.EQ(this=expression.copy(), expression=search), 6136 exp.and_( 6137 exp.Is(this=expression.copy(), expression=exp.Null()), 6138 exp.Is(this=search.copy(), expression=exp.Null()), 6139 copy=False, 6140 ), 6141 copy=False, 6142 ) 6143 ifs.append(exp.If(this=cond, true=result)) 6144 6145 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6146 6147 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6148 self._match_text_seq("KEY") 6149 key = self._parse_column() 6150 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6151 self._match_text_seq("VALUE") 6152 value = self._parse_bitwise() 6153 6154 if not key and not value: 6155 return None 6156 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6157 6158 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6159 if not this or not self._match_text_seq("FORMAT", "JSON"): 6160 return this 6161 6162 return self.expression(exp.FormatJson, this=this) 6163 6164 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6165 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6166 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6167 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6168 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6169 else: 6170 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6171 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6172 6173 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6174 6175 if not empty and not error and not null: 6176 return None 6177 6178 return self.expression( 6179 exp.OnCondition, 6180 empty=empty, 6181 error=error, 6182 null=null, 6183 ) 6184 6185 def _parse_on_handling( 6186 self, on: str, *values: str 6187 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6188 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6189 for value in values: 6190 if self._match_text_seq(value, "ON", on): 6191 return f"{value} ON {on}" 6192 6193 index = self._index 6194 if self._match(TokenType.DEFAULT): 6195 default_value = self._parse_bitwise() 6196 if self._match_text_seq("ON", on): 6197 return default_value 6198 6199 self._retreat(index) 6200 6201 return None 6202 6203 @t.overload 6204 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6205 6206 @t.overload 6207 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6208 6209 def _parse_json_object(self, agg=False): 6210 star = self._parse_star() 6211 expressions = ( 6212 [star] 6213 if star 6214 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6215 ) 6216 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6217 6218 unique_keys = None 6219 if self._match_text_seq("WITH", "UNIQUE"): 6220 unique_keys = True 6221 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6222 unique_keys = False 6223 6224 self._match_text_seq("KEYS") 6225 6226 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6227 self._parse_type() 6228 ) 6229 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6230 6231 return self.expression( 6232 exp.JSONObjectAgg if agg else exp.JSONObject, 6233 expressions=expressions, 6234 null_handling=null_handling, 6235 unique_keys=unique_keys, 6236 return_type=return_type, 6237 encoding=encoding, 6238 ) 6239 6240 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6241 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6242 if not self._match_text_seq("NESTED"): 6243 this = self._parse_id_var() 6244 kind = self._parse_types(allow_identifiers=False) 6245 nested = None 6246 else: 6247 this = None 6248 kind = None 6249 nested = True 6250 6251 path = self._match_text_seq("PATH") and self._parse_string() 6252 nested_schema = nested and self._parse_json_schema() 6253 6254 return self.expression( 6255 exp.JSONColumnDef, 6256 this=this, 6257 kind=kind, 6258 path=path, 6259 nested_schema=nested_schema, 6260 ) 6261 6262 def _parse_json_schema(self) -> exp.JSONSchema: 6263 self._match_text_seq("COLUMNS") 6264 return self.expression( 6265 exp.JSONSchema, 6266 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6267 ) 6268 6269 def _parse_json_table(self) -> exp.JSONTable: 6270 this = self._parse_format_json(self._parse_bitwise()) 6271 path = self._match(TokenType.COMMA) and self._parse_string() 6272 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6273 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6274 schema = self._parse_json_schema() 6275 6276 return exp.JSONTable( 6277 this=this, 6278 schema=schema, 6279 path=path, 6280 error_handling=error_handling, 6281 empty_handling=empty_handling, 6282 ) 6283 6284 def _parse_match_against(self) -> exp.MatchAgainst: 6285 expressions = self._parse_csv(self._parse_column) 6286 6287 self._match_text_seq(")", "AGAINST", "(") 6288 6289 this = self._parse_string() 6290 6291 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6292 modifier = "IN NATURAL LANGUAGE MODE" 6293 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6294 modifier = f"{modifier} WITH QUERY EXPANSION" 6295 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6296 modifier = "IN BOOLEAN MODE" 6297 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6298 modifier = "WITH QUERY EXPANSION" 6299 else: 6300 modifier = None 6301 6302 return self.expression( 6303 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6304 ) 6305 6306 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6307 def _parse_open_json(self) -> exp.OpenJSON: 6308 this = self._parse_bitwise() 6309 path = self._match(TokenType.COMMA) and self._parse_string() 6310 6311 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6312 this = self._parse_field(any_token=True) 6313 kind = self._parse_types() 6314 path = self._parse_string() 6315 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6316 6317 return self.expression( 6318 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6319 ) 6320 6321 expressions = None 6322 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6323 self._match_l_paren() 6324 expressions = self._parse_csv(_parse_open_json_column_def) 6325 6326 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6327 6328 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6329 args = self._parse_csv(self._parse_bitwise) 6330 6331 if self._match(TokenType.IN): 6332 return self.expression( 6333 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6334 ) 6335 6336 if haystack_first: 6337 haystack = seq_get(args, 0) 6338 needle = seq_get(args, 1) 6339 else: 6340 needle = seq_get(args, 0) 6341 haystack = seq_get(args, 1) 6342 6343 return self.expression( 6344 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6345 ) 6346 6347 def _parse_predict(self) -> exp.Predict: 6348 self._match_text_seq("MODEL") 6349 this = self._parse_table() 6350 6351 self._match(TokenType.COMMA) 6352 self._match_text_seq("TABLE") 6353 6354 return self.expression( 6355 exp.Predict, 6356 this=this, 6357 expression=self._parse_table(), 6358 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6359 ) 6360 6361 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6362 args = self._parse_csv(self._parse_table) 6363 return exp.JoinHint(this=func_name.upper(), expressions=args) 6364 6365 def _parse_substring(self) -> exp.Substring: 6366 # Postgres supports the form: substring(string [from int] [for int]) 6367 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6368 6369 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6370 6371 if self._match(TokenType.FROM): 6372 args.append(self._parse_bitwise()) 6373 if self._match(TokenType.FOR): 6374 if len(args) == 1: 6375 args.append(exp.Literal.number(1)) 6376 args.append(self._parse_bitwise()) 6377 6378 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6379 6380 def _parse_trim(self) -> exp.Trim: 6381 # https://www.w3resource.com/sql/character-functions/trim.php 6382 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6383 6384 position = None 6385 collation = None 6386 expression = None 6387 6388 if self._match_texts(self.TRIM_TYPES): 6389 position = self._prev.text.upper() 6390 6391 this = self._parse_bitwise() 6392 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6393 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6394 expression = self._parse_bitwise() 6395 6396 if invert_order: 6397 this, expression = expression, this 6398 6399 if self._match(TokenType.COLLATE): 6400 collation = self._parse_bitwise() 6401 6402 return self.expression( 6403 exp.Trim, this=this, position=position, expression=expression, collation=collation 6404 ) 6405 6406 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6407 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6408 6409 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6410 return self._parse_window(self._parse_id_var(), alias=True) 6411 6412 def _parse_respect_or_ignore_nulls( 6413 self, this: t.Optional[exp.Expression] 6414 ) -> t.Optional[exp.Expression]: 6415 if self._match_text_seq("IGNORE", "NULLS"): 6416 return self.expression(exp.IgnoreNulls, this=this) 6417 if self._match_text_seq("RESPECT", "NULLS"): 6418 return self.expression(exp.RespectNulls, this=this) 6419 return this 6420 6421 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6422 if self._match(TokenType.HAVING): 6423 self._match_texts(("MAX", "MIN")) 6424 max = self._prev.text.upper() != "MIN" 6425 return self.expression( 6426 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6427 ) 6428 6429 return this 6430 6431 def _parse_window( 6432 self, this: t.Optional[exp.Expression], alias: bool = False 6433 ) -> t.Optional[exp.Expression]: 6434 func = this 6435 comments = func.comments if isinstance(func, exp.Expression) else None 6436 6437 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6438 self._match(TokenType.WHERE) 6439 this = self.expression( 6440 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6441 ) 6442 self._match_r_paren() 6443 6444 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6445 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6446 if self._match_text_seq("WITHIN", "GROUP"): 6447 order = self._parse_wrapped(self._parse_order) 6448 this = self.expression(exp.WithinGroup, this=this, expression=order) 6449 6450 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6451 # Some dialects choose to implement and some do not. 6452 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6453 6454 # There is some code above in _parse_lambda that handles 6455 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6456 6457 # The below changes handle 6458 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6459 6460 # Oracle allows both formats 6461 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6462 # and Snowflake chose to do the same for familiarity 6463 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6464 if isinstance(this, exp.AggFunc): 6465 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6466 6467 if ignore_respect and ignore_respect is not this: 6468 ignore_respect.replace(ignore_respect.this) 6469 this = self.expression(ignore_respect.__class__, this=this) 6470 6471 this = self._parse_respect_or_ignore_nulls(this) 6472 6473 # bigquery select from window x AS (partition by ...) 6474 if alias: 6475 over = None 6476 self._match(TokenType.ALIAS) 6477 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6478 return this 6479 else: 6480 over = self._prev.text.upper() 6481 6482 if comments and isinstance(func, exp.Expression): 6483 func.pop_comments() 6484 6485 if not self._match(TokenType.L_PAREN): 6486 return self.expression( 6487 exp.Window, 6488 comments=comments, 6489 this=this, 6490 alias=self._parse_id_var(False), 6491 over=over, 6492 ) 6493 6494 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6495 6496 first = self._match(TokenType.FIRST) 6497 if self._match_text_seq("LAST"): 6498 first = False 6499 6500 partition, order = self._parse_partition_and_order() 6501 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6502 6503 if kind: 6504 self._match(TokenType.BETWEEN) 6505 start = self._parse_window_spec() 6506 self._match(TokenType.AND) 6507 end = self._parse_window_spec() 6508 6509 spec = self.expression( 6510 exp.WindowSpec, 6511 kind=kind, 6512 start=start["value"], 6513 start_side=start["side"], 6514 end=end["value"], 6515 end_side=end["side"], 6516 ) 6517 else: 6518 spec = None 6519 6520 self._match_r_paren() 6521 6522 window = self.expression( 6523 exp.Window, 6524 comments=comments, 6525 this=this, 6526 partition_by=partition, 6527 order=order, 6528 spec=spec, 6529 alias=window_alias, 6530 over=over, 6531 first=first, 6532 ) 6533 6534 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6535 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6536 return self._parse_window(window, alias=alias) 6537 6538 return window 6539 6540 def _parse_partition_and_order( 6541 self, 6542 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6543 return self._parse_partition_by(), self._parse_order() 6544 6545 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6546 self._match(TokenType.BETWEEN) 6547 6548 return { 6549 "value": ( 6550 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6551 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6552 or self._parse_bitwise() 6553 ), 6554 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6555 } 6556 6557 def _parse_alias( 6558 self, this: t.Optional[exp.Expression], explicit: bool = False 6559 ) -> t.Optional[exp.Expression]: 6560 any_token = self._match(TokenType.ALIAS) 6561 comments = self._prev_comments or [] 6562 6563 if explicit and not any_token: 6564 return this 6565 6566 if self._match(TokenType.L_PAREN): 6567 aliases = self.expression( 6568 exp.Aliases, 6569 comments=comments, 6570 this=this, 6571 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6572 ) 6573 self._match_r_paren(aliases) 6574 return aliases 6575 6576 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6577 self.STRING_ALIASES and self._parse_string_as_identifier() 6578 ) 6579 6580 if alias: 6581 comments.extend(alias.pop_comments()) 6582 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6583 column = this.this 6584 6585 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6586 if not this.comments and column and column.comments: 6587 this.comments = column.pop_comments() 6588 6589 return this 6590 6591 def _parse_id_var( 6592 self, 6593 any_token: bool = True, 6594 tokens: t.Optional[t.Collection[TokenType]] = None, 6595 ) -> t.Optional[exp.Expression]: 6596 expression = self._parse_identifier() 6597 if not expression and ( 6598 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6599 ): 6600 quoted = self._prev.token_type == TokenType.STRING 6601 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6602 6603 return expression 6604 6605 def _parse_string(self) -> t.Optional[exp.Expression]: 6606 if self._match_set(self.STRING_PARSERS): 6607 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6608 return self._parse_placeholder() 6609 6610 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6611 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6612 6613 def _parse_number(self) -> t.Optional[exp.Expression]: 6614 if self._match_set(self.NUMERIC_PARSERS): 6615 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6616 return self._parse_placeholder() 6617 6618 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6619 if self._match(TokenType.IDENTIFIER): 6620 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6621 return self._parse_placeholder() 6622 6623 def _parse_var( 6624 self, 6625 any_token: bool = False, 6626 tokens: t.Optional[t.Collection[TokenType]] = None, 6627 upper: bool = False, 6628 ) -> t.Optional[exp.Expression]: 6629 if ( 6630 (any_token and self._advance_any()) 6631 or self._match(TokenType.VAR) 6632 or (self._match_set(tokens) if tokens else False) 6633 ): 6634 return self.expression( 6635 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6636 ) 6637 return self._parse_placeholder() 6638 6639 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6640 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6641 self._advance() 6642 return self._prev 6643 return None 6644 6645 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6646 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6647 6648 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6649 return self._parse_primary() or self._parse_var(any_token=True) 6650 6651 def _parse_null(self) -> t.Optional[exp.Expression]: 6652 if self._match_set(self.NULL_TOKENS): 6653 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6654 return self._parse_placeholder() 6655 6656 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6657 if self._match(TokenType.TRUE): 6658 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6659 if self._match(TokenType.FALSE): 6660 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6661 return self._parse_placeholder() 6662 6663 def _parse_star(self) -> t.Optional[exp.Expression]: 6664 if self._match(TokenType.STAR): 6665 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6666 return self._parse_placeholder() 6667 6668 def _parse_parameter(self) -> exp.Parameter: 6669 this = self._parse_identifier() or self._parse_primary_or_var() 6670 return self.expression(exp.Parameter, this=this) 6671 6672 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6673 if self._match_set(self.PLACEHOLDER_PARSERS): 6674 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6675 if placeholder: 6676 return placeholder 6677 self._advance(-1) 6678 return None 6679 6680 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6681 if not self._match_texts(keywords): 6682 return None 6683 if self._match(TokenType.L_PAREN, advance=False): 6684 return self._parse_wrapped_csv(self._parse_expression) 6685 6686 expression = self._parse_expression() 6687 return [expression] if expression else None 6688 6689 def _parse_csv( 6690 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6691 ) -> t.List[exp.Expression]: 6692 parse_result = parse_method() 6693 items = [parse_result] if parse_result is not None else [] 6694 6695 while self._match(sep): 6696 self._add_comments(parse_result) 6697 parse_result = parse_method() 6698 if parse_result is not None: 6699 items.append(parse_result) 6700 6701 return items 6702 6703 def _parse_tokens( 6704 self, parse_method: t.Callable, expressions: t.Dict 6705 ) -> t.Optional[exp.Expression]: 6706 this = parse_method() 6707 6708 while self._match_set(expressions): 6709 this = self.expression( 6710 expressions[self._prev.token_type], 6711 this=this, 6712 comments=self._prev_comments, 6713 expression=parse_method(), 6714 ) 6715 6716 return this 6717 6718 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6719 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6720 6721 def _parse_wrapped_csv( 6722 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6723 ) -> t.List[exp.Expression]: 6724 return self._parse_wrapped( 6725 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6726 ) 6727 6728 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6729 wrapped = self._match(TokenType.L_PAREN) 6730 if not wrapped and not optional: 6731 self.raise_error("Expecting (") 6732 parse_result = parse_method() 6733 if wrapped: 6734 self._match_r_paren() 6735 return parse_result 6736 6737 def _parse_expressions(self) -> t.List[exp.Expression]: 6738 return self._parse_csv(self._parse_expression) 6739 6740 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6741 return self._parse_select() or self._parse_set_operations( 6742 self._parse_alias(self._parse_assignment(), explicit=True) 6743 if alias 6744 else self._parse_assignment() 6745 ) 6746 6747 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6748 return self._parse_query_modifiers( 6749 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6750 ) 6751 6752 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6753 this = None 6754 if self._match_texts(self.TRANSACTION_KIND): 6755 this = self._prev.text 6756 6757 self._match_texts(("TRANSACTION", "WORK")) 6758 6759 modes = [] 6760 while True: 6761 mode = [] 6762 while self._match(TokenType.VAR): 6763 mode.append(self._prev.text) 6764 6765 if mode: 6766 modes.append(" ".join(mode)) 6767 if not self._match(TokenType.COMMA): 6768 break 6769 6770 return self.expression(exp.Transaction, this=this, modes=modes) 6771 6772 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6773 chain = None 6774 savepoint = None 6775 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6776 6777 self._match_texts(("TRANSACTION", "WORK")) 6778 6779 if self._match_text_seq("TO"): 6780 self._match_text_seq("SAVEPOINT") 6781 savepoint = self._parse_id_var() 6782 6783 if self._match(TokenType.AND): 6784 chain = not self._match_text_seq("NO") 6785 self._match_text_seq("CHAIN") 6786 6787 if is_rollback: 6788 return self.expression(exp.Rollback, savepoint=savepoint) 6789 6790 return self.expression(exp.Commit, chain=chain) 6791 6792 def _parse_refresh(self) -> exp.Refresh: 6793 self._match(TokenType.TABLE) 6794 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6795 6796 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6797 if not self._match_text_seq("ADD"): 6798 return None 6799 6800 self._match(TokenType.COLUMN) 6801 exists_column = self._parse_exists(not_=True) 6802 expression = self._parse_field_def() 6803 6804 if expression: 6805 expression.set("exists", exists_column) 6806 6807 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6808 if self._match_texts(("FIRST", "AFTER")): 6809 position = self._prev.text 6810 column_position = self.expression( 6811 exp.ColumnPosition, this=self._parse_column(), position=position 6812 ) 6813 expression.set("position", column_position) 6814 6815 return expression 6816 6817 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6818 drop = self._match(TokenType.DROP) and self._parse_drop() 6819 if drop and not isinstance(drop, exp.Command): 6820 drop.set("kind", drop.args.get("kind", "COLUMN")) 6821 return drop 6822 6823 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6824 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6825 return self.expression( 6826 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6827 ) 6828 6829 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6830 index = self._index - 1 6831 6832 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6833 return self._parse_csv( 6834 lambda: self.expression( 6835 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6836 ) 6837 ) 6838 6839 self._retreat(index) 6840 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6841 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6842 6843 if self._match_text_seq("ADD", "COLUMNS"): 6844 schema = self._parse_schema() 6845 if schema: 6846 return [schema] 6847 return [] 6848 6849 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6850 6851 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6852 if self._match_texts(self.ALTER_ALTER_PARSERS): 6853 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6854 6855 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6856 # keyword after ALTER we default to parsing this statement 6857 self._match(TokenType.COLUMN) 6858 column = self._parse_field(any_token=True) 6859 6860 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6861 return self.expression(exp.AlterColumn, this=column, drop=True) 6862 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6863 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6864 if self._match(TokenType.COMMENT): 6865 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6866 if self._match_text_seq("DROP", "NOT", "NULL"): 6867 return self.expression( 6868 exp.AlterColumn, 6869 this=column, 6870 drop=True, 6871 allow_null=True, 6872 ) 6873 if self._match_text_seq("SET", "NOT", "NULL"): 6874 return self.expression( 6875 exp.AlterColumn, 6876 this=column, 6877 allow_null=False, 6878 ) 6879 self._match_text_seq("SET", "DATA") 6880 self._match_text_seq("TYPE") 6881 return self.expression( 6882 exp.AlterColumn, 6883 this=column, 6884 dtype=self._parse_types(), 6885 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6886 using=self._match(TokenType.USING) and self._parse_assignment(), 6887 ) 6888 6889 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6890 if self._match_texts(("ALL", "EVEN", "AUTO")): 6891 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6892 6893 self._match_text_seq("KEY", "DISTKEY") 6894 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6895 6896 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6897 if compound: 6898 self._match_text_seq("SORTKEY") 6899 6900 if self._match(TokenType.L_PAREN, advance=False): 6901 return self.expression( 6902 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6903 ) 6904 6905 self._match_texts(("AUTO", "NONE")) 6906 return self.expression( 6907 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6908 ) 6909 6910 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6911 index = self._index - 1 6912 6913 partition_exists = self._parse_exists() 6914 if self._match(TokenType.PARTITION, advance=False): 6915 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6916 6917 self._retreat(index) 6918 return self._parse_csv(self._parse_drop_column) 6919 6920 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6921 if self._match(TokenType.COLUMN): 6922 exists = self._parse_exists() 6923 old_column = self._parse_column() 6924 to = self._match_text_seq("TO") 6925 new_column = self._parse_column() 6926 6927 if old_column is None or to is None or new_column is None: 6928 return None 6929 6930 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6931 6932 self._match_text_seq("TO") 6933 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6934 6935 def _parse_alter_table_set(self) -> exp.AlterSet: 6936 alter_set = self.expression(exp.AlterSet) 6937 6938 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6939 "TABLE", "PROPERTIES" 6940 ): 6941 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6942 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6943 alter_set.set("expressions", [self._parse_assignment()]) 6944 elif self._match_texts(("LOGGED", "UNLOGGED")): 6945 alter_set.set("option", exp.var(self._prev.text.upper())) 6946 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6947 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6948 elif self._match_text_seq("LOCATION"): 6949 alter_set.set("location", self._parse_field()) 6950 elif self._match_text_seq("ACCESS", "METHOD"): 6951 alter_set.set("access_method", self._parse_field()) 6952 elif self._match_text_seq("TABLESPACE"): 6953 alter_set.set("tablespace", self._parse_field()) 6954 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6955 alter_set.set("file_format", [self._parse_field()]) 6956 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6957 alter_set.set("file_format", self._parse_wrapped_options()) 6958 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6959 alter_set.set("copy_options", self._parse_wrapped_options()) 6960 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6961 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6962 else: 6963 if self._match_text_seq("SERDE"): 6964 alter_set.set("serde", self._parse_field()) 6965 6966 alter_set.set("expressions", [self._parse_properties()]) 6967 6968 return alter_set 6969 6970 def _parse_alter(self) -> exp.Alter | exp.Command: 6971 start = self._prev 6972 6973 alter_token = self._match_set(self.ALTERABLES) and self._prev 6974 if not alter_token: 6975 return self._parse_as_command(start) 6976 6977 exists = self._parse_exists() 6978 only = self._match_text_seq("ONLY") 6979 this = self._parse_table(schema=True) 6980 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6981 6982 if self._next: 6983 self._advance() 6984 6985 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6986 if parser: 6987 actions = ensure_list(parser(self)) 6988 not_valid = self._match_text_seq("NOT", "VALID") 6989 options = self._parse_csv(self._parse_property) 6990 6991 if not self._curr and actions: 6992 return self.expression( 6993 exp.Alter, 6994 this=this, 6995 kind=alter_token.text.upper(), 6996 exists=exists, 6997 actions=actions, 6998 only=only, 6999 options=options, 7000 cluster=cluster, 7001 not_valid=not_valid, 7002 ) 7003 7004 return self._parse_as_command(start) 7005 7006 def _parse_merge(self) -> exp.Merge: 7007 self._match(TokenType.INTO) 7008 target = self._parse_table() 7009 7010 if target and self._match(TokenType.ALIAS, advance=False): 7011 target.set("alias", self._parse_table_alias()) 7012 7013 self._match(TokenType.USING) 7014 using = self._parse_table() 7015 7016 self._match(TokenType.ON) 7017 on = self._parse_assignment() 7018 7019 return self.expression( 7020 exp.Merge, 7021 this=target, 7022 using=using, 7023 on=on, 7024 whens=self._parse_when_matched(), 7025 returning=self._parse_returning(), 7026 ) 7027 7028 def _parse_when_matched(self) -> exp.Whens: 7029 whens = [] 7030 7031 while self._match(TokenType.WHEN): 7032 matched = not self._match(TokenType.NOT) 7033 self._match_text_seq("MATCHED") 7034 source = ( 7035 False 7036 if self._match_text_seq("BY", "TARGET") 7037 else self._match_text_seq("BY", "SOURCE") 7038 ) 7039 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7040 7041 self._match(TokenType.THEN) 7042 7043 if self._match(TokenType.INSERT): 7044 this = self._parse_star() 7045 if this: 7046 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7047 else: 7048 then = self.expression( 7049 exp.Insert, 7050 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7051 expression=self._match_text_seq("VALUES") and self._parse_value(), 7052 ) 7053 elif self._match(TokenType.UPDATE): 7054 expressions = self._parse_star() 7055 if expressions: 7056 then = self.expression(exp.Update, expressions=expressions) 7057 else: 7058 then = self.expression( 7059 exp.Update, 7060 expressions=self._match(TokenType.SET) 7061 and self._parse_csv(self._parse_equality), 7062 ) 7063 elif self._match(TokenType.DELETE): 7064 then = self.expression(exp.Var, this=self._prev.text) 7065 else: 7066 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7067 7068 whens.append( 7069 self.expression( 7070 exp.When, 7071 matched=matched, 7072 source=source, 7073 condition=condition, 7074 then=then, 7075 ) 7076 ) 7077 return self.expression(exp.Whens, expressions=whens) 7078 7079 def _parse_show(self) -> t.Optional[exp.Expression]: 7080 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7081 if parser: 7082 return parser(self) 7083 return self._parse_as_command(self._prev) 7084 7085 def _parse_set_item_assignment( 7086 self, kind: t.Optional[str] = None 7087 ) -> t.Optional[exp.Expression]: 7088 index = self._index 7089 7090 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7091 return self._parse_set_transaction(global_=kind == "GLOBAL") 7092 7093 left = self._parse_primary() or self._parse_column() 7094 assignment_delimiter = self._match_texts(("=", "TO")) 7095 7096 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7097 self._retreat(index) 7098 return None 7099 7100 right = self._parse_statement() or self._parse_id_var() 7101 if isinstance(right, (exp.Column, exp.Identifier)): 7102 right = exp.var(right.name) 7103 7104 this = self.expression(exp.EQ, this=left, expression=right) 7105 return self.expression(exp.SetItem, this=this, kind=kind) 7106 7107 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7108 self._match_text_seq("TRANSACTION") 7109 characteristics = self._parse_csv( 7110 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7111 ) 7112 return self.expression( 7113 exp.SetItem, 7114 expressions=characteristics, 7115 kind="TRANSACTION", 7116 **{"global": global_}, # type: ignore 7117 ) 7118 7119 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7120 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7121 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7122 7123 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7124 index = self._index 7125 set_ = self.expression( 7126 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7127 ) 7128 7129 if self._curr: 7130 self._retreat(index) 7131 return self._parse_as_command(self._prev) 7132 7133 return set_ 7134 7135 def _parse_var_from_options( 7136 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7137 ) -> t.Optional[exp.Var]: 7138 start = self._curr 7139 if not start: 7140 return None 7141 7142 option = start.text.upper() 7143 continuations = options.get(option) 7144 7145 index = self._index 7146 self._advance() 7147 for keywords in continuations or []: 7148 if isinstance(keywords, str): 7149 keywords = (keywords,) 7150 7151 if self._match_text_seq(*keywords): 7152 option = f"{option} {' '.join(keywords)}" 7153 break 7154 else: 7155 if continuations or continuations is None: 7156 if raise_unmatched: 7157 self.raise_error(f"Unknown option {option}") 7158 7159 self._retreat(index) 7160 return None 7161 7162 return exp.var(option) 7163 7164 def _parse_as_command(self, start: Token) -> exp.Command: 7165 while self._curr: 7166 self._advance() 7167 text = self._find_sql(start, self._prev) 7168 size = len(start.text) 7169 self._warn_unsupported() 7170 return exp.Command(this=text[:size], expression=text[size:]) 7171 7172 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7173 settings = [] 7174 7175 self._match_l_paren() 7176 kind = self._parse_id_var() 7177 7178 if self._match(TokenType.L_PAREN): 7179 while True: 7180 key = self._parse_id_var() 7181 value = self._parse_primary() 7182 if not key and value is None: 7183 break 7184 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7185 self._match(TokenType.R_PAREN) 7186 7187 self._match_r_paren() 7188 7189 return self.expression( 7190 exp.DictProperty, 7191 this=this, 7192 kind=kind.this if kind else None, 7193 settings=settings, 7194 ) 7195 7196 def _parse_dict_range(self, this: str) -> exp.DictRange: 7197 self._match_l_paren() 7198 has_min = self._match_text_seq("MIN") 7199 if has_min: 7200 min = self._parse_var() or self._parse_primary() 7201 self._match_text_seq("MAX") 7202 max = self._parse_var() or self._parse_primary() 7203 else: 7204 max = self._parse_var() or self._parse_primary() 7205 min = exp.Literal.number(0) 7206 self._match_r_paren() 7207 return self.expression(exp.DictRange, this=this, min=min, max=max) 7208 7209 def _parse_comprehension( 7210 self, this: t.Optional[exp.Expression] 7211 ) -> t.Optional[exp.Comprehension]: 7212 index = self._index 7213 expression = self._parse_column() 7214 if not self._match(TokenType.IN): 7215 self._retreat(index - 1) 7216 return None 7217 iterator = self._parse_column() 7218 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7219 return self.expression( 7220 exp.Comprehension, 7221 this=this, 7222 expression=expression, 7223 iterator=iterator, 7224 condition=condition, 7225 ) 7226 7227 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7228 if self._match(TokenType.HEREDOC_STRING): 7229 return self.expression(exp.Heredoc, this=self._prev.text) 7230 7231 if not self._match_text_seq("$"): 7232 return None 7233 7234 tags = ["$"] 7235 tag_text = None 7236 7237 if self._is_connected(): 7238 self._advance() 7239 tags.append(self._prev.text.upper()) 7240 else: 7241 self.raise_error("No closing $ found") 7242 7243 if tags[-1] != "$": 7244 if self._is_connected() and self._match_text_seq("$"): 7245 tag_text = tags[-1] 7246 tags.append("$") 7247 else: 7248 self.raise_error("No closing $ found") 7249 7250 heredoc_start = self._curr 7251 7252 while self._curr: 7253 if self._match_text_seq(*tags, advance=False): 7254 this = self._find_sql(heredoc_start, self._prev) 7255 self._advance(len(tags)) 7256 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7257 7258 self._advance() 7259 7260 self.raise_error(f"No closing {''.join(tags)} found") 7261 return None 7262 7263 def _find_parser( 7264 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7265 ) -> t.Optional[t.Callable]: 7266 if not self._curr: 7267 return None 7268 7269 index = self._index 7270 this = [] 7271 while True: 7272 # The current token might be multiple words 7273 curr = self._curr.text.upper() 7274 key = curr.split(" ") 7275 this.append(curr) 7276 7277 self._advance() 7278 result, trie = in_trie(trie, key) 7279 if result == TrieResult.FAILED: 7280 break 7281 7282 if result == TrieResult.EXISTS: 7283 subparser = parsers[" ".join(this)] 7284 return subparser 7285 7286 self._retreat(index) 7287 return None 7288 7289 def _match(self, token_type, advance=True, expression=None): 7290 if not self._curr: 7291 return None 7292 7293 if self._curr.token_type == token_type: 7294 if advance: 7295 self._advance() 7296 self._add_comments(expression) 7297 return True 7298 7299 return None 7300 7301 def _match_set(self, types, advance=True): 7302 if not self._curr: 7303 return None 7304 7305 if self._curr.token_type in types: 7306 if advance: 7307 self._advance() 7308 return True 7309 7310 return None 7311 7312 def _match_pair(self, token_type_a, token_type_b, advance=True): 7313 if not self._curr or not self._next: 7314 return None 7315 7316 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7317 if advance: 7318 self._advance(2) 7319 return True 7320 7321 return None 7322 7323 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7324 if not self._match(TokenType.L_PAREN, expression=expression): 7325 self.raise_error("Expecting (") 7326 7327 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7328 if not self._match(TokenType.R_PAREN, expression=expression): 7329 self.raise_error("Expecting )") 7330 7331 def _match_texts(self, texts, advance=True): 7332 if ( 7333 self._curr 7334 and self._curr.token_type != TokenType.STRING 7335 and self._curr.text.upper() in texts 7336 ): 7337 if advance: 7338 self._advance() 7339 return True 7340 return None 7341 7342 def _match_text_seq(self, *texts, advance=True): 7343 index = self._index 7344 for text in texts: 7345 if ( 7346 self._curr 7347 and self._curr.token_type != TokenType.STRING 7348 and self._curr.text.upper() == text 7349 ): 7350 self._advance() 7351 else: 7352 self._retreat(index) 7353 return None 7354 7355 if not advance: 7356 self._retreat(index) 7357 7358 return True 7359 7360 def _replace_lambda( 7361 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7362 ) -> t.Optional[exp.Expression]: 7363 if not node: 7364 return node 7365 7366 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7367 7368 for column in node.find_all(exp.Column): 7369 typ = lambda_types.get(column.parts[0].name) 7370 if typ is not None: 7371 dot_or_id = column.to_dot() if column.table else column.this 7372 7373 if typ: 7374 dot_or_id = self.expression( 7375 exp.Cast, 7376 this=dot_or_id, 7377 to=typ, 7378 ) 7379 7380 parent = column.parent 7381 7382 while isinstance(parent, exp.Dot): 7383 if not isinstance(parent.parent, exp.Dot): 7384 parent.replace(dot_or_id) 7385 break 7386 parent = parent.parent 7387 else: 7388 if column is node: 7389 node = dot_or_id 7390 else: 7391 column.replace(dot_or_id) 7392 return node 7393 7394 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7395 start = self._prev 7396 7397 # Not to be confused with TRUNCATE(number, decimals) function call 7398 if self._match(TokenType.L_PAREN): 7399 self._retreat(self._index - 2) 7400 return self._parse_function() 7401 7402 # Clickhouse supports TRUNCATE DATABASE as well 7403 is_database = self._match(TokenType.DATABASE) 7404 7405 self._match(TokenType.TABLE) 7406 7407 exists = self._parse_exists(not_=False) 7408 7409 expressions = self._parse_csv( 7410 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7411 ) 7412 7413 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7414 7415 if self._match_text_seq("RESTART", "IDENTITY"): 7416 identity = "RESTART" 7417 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7418 identity = "CONTINUE" 7419 else: 7420 identity = None 7421 7422 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7423 option = self._prev.text 7424 else: 7425 option = None 7426 7427 partition = self._parse_partition() 7428 7429 # Fallback case 7430 if self._curr: 7431 return self._parse_as_command(start) 7432 7433 return self.expression( 7434 exp.TruncateTable, 7435 expressions=expressions, 7436 is_database=is_database, 7437 exists=exists, 7438 cluster=cluster, 7439 identity=identity, 7440 option=option, 7441 partition=partition, 7442 ) 7443 7444 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7445 this = self._parse_ordered(self._parse_opclass) 7446 7447 if not self._match(TokenType.WITH): 7448 return this 7449 7450 op = self._parse_var(any_token=True) 7451 7452 return self.expression(exp.WithOperator, this=this, op=op) 7453 7454 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7455 self._match(TokenType.EQ) 7456 self._match(TokenType.L_PAREN) 7457 7458 opts: t.List[t.Optional[exp.Expression]] = [] 7459 while self._curr and not self._match(TokenType.R_PAREN): 7460 if self._match_text_seq("FORMAT_NAME", "="): 7461 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7462 # so we parse it separately to use _parse_field() 7463 prop = self.expression( 7464 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7465 ) 7466 opts.append(prop) 7467 else: 7468 opts.append(self._parse_property()) 7469 7470 self._match(TokenType.COMMA) 7471 7472 return opts 7473 7474 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7475 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7476 7477 options = [] 7478 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7479 option = self._parse_var(any_token=True) 7480 prev = self._prev.text.upper() 7481 7482 # Different dialects might separate options and values by white space, "=" and "AS" 7483 self._match(TokenType.EQ) 7484 self._match(TokenType.ALIAS) 7485 7486 param = self.expression(exp.CopyParameter, this=option) 7487 7488 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7489 TokenType.L_PAREN, advance=False 7490 ): 7491 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7492 param.set("expressions", self._parse_wrapped_options()) 7493 elif prev == "FILE_FORMAT": 7494 # T-SQL's external file format case 7495 param.set("expression", self._parse_field()) 7496 else: 7497 param.set("expression", self._parse_unquoted_field()) 7498 7499 options.append(param) 7500 self._match(sep) 7501 7502 return options 7503 7504 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7505 expr = self.expression(exp.Credentials) 7506 7507 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7508 expr.set("storage", self._parse_field()) 7509 if self._match_text_seq("CREDENTIALS"): 7510 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7511 creds = ( 7512 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7513 ) 7514 expr.set("credentials", creds) 7515 if self._match_text_seq("ENCRYPTION"): 7516 expr.set("encryption", self._parse_wrapped_options()) 7517 if self._match_text_seq("IAM_ROLE"): 7518 expr.set("iam_role", self._parse_field()) 7519 if self._match_text_seq("REGION"): 7520 expr.set("region", self._parse_field()) 7521 7522 return expr 7523 7524 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7525 return self._parse_field() 7526 7527 def _parse_copy(self) -> exp.Copy | exp.Command: 7528 start = self._prev 7529 7530 self._match(TokenType.INTO) 7531 7532 this = ( 7533 self._parse_select(nested=True, parse_subquery_alias=False) 7534 if self._match(TokenType.L_PAREN, advance=False) 7535 else self._parse_table(schema=True) 7536 ) 7537 7538 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7539 7540 files = self._parse_csv(self._parse_file_location) 7541 credentials = self._parse_credentials() 7542 7543 self._match_text_seq("WITH") 7544 7545 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7546 7547 # Fallback case 7548 if self._curr: 7549 return self._parse_as_command(start) 7550 7551 return self.expression( 7552 exp.Copy, 7553 this=this, 7554 kind=kind, 7555 credentials=credentials, 7556 files=files, 7557 params=params, 7558 ) 7559 7560 def _parse_normalize(self) -> exp.Normalize: 7561 return self.expression( 7562 exp.Normalize, 7563 this=self._parse_bitwise(), 7564 form=self._match(TokenType.COMMA) and self._parse_var(), 7565 ) 7566 7567 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7568 if self._match_text_seq("COLUMNS", "(", advance=False): 7569 this = self._parse_function() 7570 if isinstance(this, exp.Columns): 7571 this.set("unpack", True) 7572 return this 7573 7574 return self.expression( 7575 exp.Star, 7576 **{ # type: ignore 7577 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7578 "replace": self._parse_star_op("REPLACE"), 7579 "rename": self._parse_star_op("RENAME"), 7580 }, 7581 ) 7582 7583 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7584 privilege_parts = [] 7585 7586 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7587 # (end of privilege list) or L_PAREN (start of column list) are met 7588 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7589 privilege_parts.append(self._curr.text.upper()) 7590 self._advance() 7591 7592 this = exp.var(" ".join(privilege_parts)) 7593 expressions = ( 7594 self._parse_wrapped_csv(self._parse_column) 7595 if self._match(TokenType.L_PAREN, advance=False) 7596 else None 7597 ) 7598 7599 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7600 7601 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7602 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7603 principal = self._parse_id_var() 7604 7605 if not principal: 7606 return None 7607 7608 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7609 7610 def _parse_grant(self) -> exp.Grant | exp.Command: 7611 start = self._prev 7612 7613 privileges = self._parse_csv(self._parse_grant_privilege) 7614 7615 self._match(TokenType.ON) 7616 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7617 7618 # Attempt to parse the securable e.g. MySQL allows names 7619 # such as "foo.*", "*.*" which are not easily parseable yet 7620 securable = self._try_parse(self._parse_table_parts) 7621 7622 if not securable or not self._match_text_seq("TO"): 7623 return self._parse_as_command(start) 7624 7625 principals = self._parse_csv(self._parse_grant_principal) 7626 7627 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7628 7629 if self._curr: 7630 return self._parse_as_command(start) 7631 7632 return self.expression( 7633 exp.Grant, 7634 privileges=privileges, 7635 kind=kind, 7636 securable=securable, 7637 principals=principals, 7638 grant_option=grant_option, 7639 ) 7640 7641 def _parse_overlay(self) -> exp.Overlay: 7642 return self.expression( 7643 exp.Overlay, 7644 **{ # type: ignore 7645 "this": self._parse_bitwise(), 7646 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7647 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7648 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7649 }, 7650 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1401 def __init__( 1402 self, 1403 error_level: t.Optional[ErrorLevel] = None, 1404 error_message_context: int = 100, 1405 max_errors: int = 3, 1406 dialect: DialectType = None, 1407 ): 1408 from sqlglot.dialects import Dialect 1409 1410 self.error_level = error_level or ErrorLevel.IMMEDIATE 1411 self.error_message_context = error_message_context 1412 self.max_errors = max_errors 1413 self.dialect = Dialect.get_or_raise(dialect) 1414 self.reset()
1426 def parse( 1427 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1428 ) -> t.List[t.Optional[exp.Expression]]: 1429 """ 1430 Parses a list of tokens and returns a list of syntax trees, one tree 1431 per parsed SQL statement. 1432 1433 Args: 1434 raw_tokens: The list of tokens. 1435 sql: The original SQL string, used to produce helpful debug messages. 1436 1437 Returns: 1438 The list of the produced syntax trees. 1439 """ 1440 return self._parse( 1441 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1442 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1444 def parse_into( 1445 self, 1446 expression_types: exp.IntoType, 1447 raw_tokens: t.List[Token], 1448 sql: t.Optional[str] = None, 1449 ) -> t.List[t.Optional[exp.Expression]]: 1450 """ 1451 Parses a list of tokens into a given Expression type. If a collection of Expression 1452 types is given instead, this method will try to parse the token list into each one 1453 of them, stopping at the first for which the parsing succeeds. 1454 1455 Args: 1456 expression_types: The expression type(s) to try and parse the token list into. 1457 raw_tokens: The list of tokens. 1458 sql: The original SQL string, used to produce helpful debug messages. 1459 1460 Returns: 1461 The target Expression. 1462 """ 1463 errors = [] 1464 for expression_type in ensure_list(expression_types): 1465 parser = self.EXPRESSION_PARSERS.get(expression_type) 1466 if not parser: 1467 raise TypeError(f"No parser registered for {expression_type}") 1468 1469 try: 1470 return self._parse(parser, raw_tokens, sql) 1471 except ParseError as e: 1472 e.errors[0]["into_expression"] = expression_type 1473 errors.append(e) 1474 1475 raise ParseError( 1476 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1477 errors=merge_errors(errors), 1478 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1518 def check_errors(self) -> None: 1519 """Logs or raises any found errors, depending on the chosen error level setting.""" 1520 if self.error_level == ErrorLevel.WARN: 1521 for error in self.errors: 1522 logger.error(str(error)) 1523 elif self.error_level == ErrorLevel.RAISE and self.errors: 1524 raise ParseError( 1525 concat_messages(self.errors, self.max_errors), 1526 errors=merge_errors(self.errors), 1527 )
Logs or raises any found errors, depending on the chosen error level setting.
1529 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1530 """ 1531 Appends an error in the list of recorded errors or raises it, depending on the chosen 1532 error level setting. 1533 """ 1534 token = token or self._curr or self._prev or Token.string("") 1535 start = token.start 1536 end = token.end + 1 1537 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1538 highlight = self.sql[start:end] 1539 end_context = self.sql[end : end + self.error_message_context] 1540 1541 error = ParseError.new( 1542 f"{message}. Line {token.line}, Col: {token.col}.\n" 1543 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1544 description=message, 1545 line=token.line, 1546 col=token.col, 1547 start_context=start_context, 1548 highlight=highlight, 1549 end_context=end_context, 1550 ) 1551 1552 if self.error_level == ErrorLevel.IMMEDIATE: 1553 raise error 1554 1555 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1557 def expression( 1558 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1559 ) -> E: 1560 """ 1561 Creates a new, validated Expression. 1562 1563 Args: 1564 exp_class: The expression class to instantiate. 1565 comments: An optional list of comments to attach to the expression. 1566 kwargs: The arguments to set for the expression along with their respective values. 1567 1568 Returns: 1569 The target expression. 1570 """ 1571 instance = exp_class(**kwargs) 1572 instance.add_comments(comments) if comments else self._add_comments(instance) 1573 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1580 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1581 """ 1582 Validates an Expression, making sure that all its mandatory arguments are set. 1583 1584 Args: 1585 expression: The expression to validate. 1586 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1587 1588 Returns: 1589 The validated expression. 1590 """ 1591 if self.error_level != ErrorLevel.IGNORE: 1592 for error_message in expression.error_messages(args): 1593 self.raise_error(error_message) 1594 1595 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.