sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.DECIMAL256, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.POINT, 361 TokenType.RING, 362 TokenType.LINESTRING, 363 TokenType.MULTILINESTRING, 364 TokenType.POLYGON, 365 TokenType.MULTIPOLYGON, 366 TokenType.HLLSKETCH, 367 TokenType.HSTORE, 368 TokenType.PSEUDO_TYPE, 369 TokenType.SUPER, 370 TokenType.SERIAL, 371 TokenType.SMALLSERIAL, 372 TokenType.BIGSERIAL, 373 TokenType.XML, 374 TokenType.YEAR, 375 TokenType.UNIQUEIDENTIFIER, 376 TokenType.USERDEFINED, 377 TokenType.MONEY, 378 TokenType.SMALLMONEY, 379 TokenType.ROWVERSION, 380 TokenType.IMAGE, 381 TokenType.VARIANT, 382 TokenType.VECTOR, 383 TokenType.OBJECT, 384 TokenType.OBJECT_IDENTIFIER, 385 TokenType.INET, 386 TokenType.IPADDRESS, 387 TokenType.IPPREFIX, 388 TokenType.IPV4, 389 TokenType.IPV6, 390 TokenType.UNKNOWN, 391 TokenType.NULL, 392 TokenType.NAME, 393 TokenType.TDIGEST, 394 *ENUM_TYPE_TOKENS, 395 *NESTED_TYPE_TOKENS, 396 *AGGREGATE_TYPE_TOKENS, 397 } 398 399 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 400 TokenType.BIGINT: TokenType.UBIGINT, 401 TokenType.INT: TokenType.UINT, 402 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 403 TokenType.SMALLINT: TokenType.USMALLINT, 404 TokenType.TINYINT: TokenType.UTINYINT, 405 TokenType.DECIMAL: TokenType.UDECIMAL, 406 } 407 408 SUBQUERY_PREDICATES = { 409 TokenType.ANY: exp.Any, 410 TokenType.ALL: exp.All, 411 TokenType.EXISTS: exp.Exists, 412 TokenType.SOME: exp.Any, 413 } 414 415 RESERVED_TOKENS = { 416 *Tokenizer.SINGLE_TOKENS.values(), 417 TokenType.SELECT, 418 } - {TokenType.IDENTIFIER} 419 420 DB_CREATABLES = { 421 TokenType.DATABASE, 422 TokenType.DICTIONARY, 423 TokenType.MODEL, 424 TokenType.SCHEMA, 425 TokenType.SEQUENCE, 426 TokenType.STORAGE_INTEGRATION, 427 TokenType.TABLE, 428 TokenType.TAG, 429 TokenType.VIEW, 430 TokenType.WAREHOUSE, 431 TokenType.STREAMLIT, 432 } 433 434 CREATABLES = { 435 TokenType.COLUMN, 436 TokenType.CONSTRAINT, 437 TokenType.FOREIGN_KEY, 438 TokenType.FUNCTION, 439 TokenType.INDEX, 440 TokenType.PROCEDURE, 441 *DB_CREATABLES, 442 } 443 444 ALTERABLES = { 445 TokenType.INDEX, 446 TokenType.TABLE, 447 TokenType.VIEW, 448 } 449 450 # Tokens that can represent identifiers 451 ID_VAR_TOKENS = { 452 TokenType.ALL, 453 TokenType.VAR, 454 TokenType.ANTI, 455 TokenType.APPLY, 456 TokenType.ASC, 457 TokenType.ASOF, 458 TokenType.AUTO_INCREMENT, 459 TokenType.BEGIN, 460 TokenType.BPCHAR, 461 TokenType.CACHE, 462 TokenType.CASE, 463 TokenType.COLLATE, 464 TokenType.COMMAND, 465 TokenType.COMMENT, 466 TokenType.COMMIT, 467 TokenType.CONSTRAINT, 468 TokenType.COPY, 469 TokenType.CUBE, 470 TokenType.DEFAULT, 471 TokenType.DELETE, 472 TokenType.DESC, 473 TokenType.DESCRIBE, 474 TokenType.DICTIONARY, 475 TokenType.DIV, 476 TokenType.END, 477 TokenType.EXECUTE, 478 TokenType.ESCAPE, 479 TokenType.FALSE, 480 TokenType.FIRST, 481 TokenType.FILTER, 482 TokenType.FINAL, 483 TokenType.FORMAT, 484 TokenType.FULL, 485 TokenType.IDENTIFIER, 486 TokenType.IS, 487 TokenType.ISNULL, 488 TokenType.INTERVAL, 489 TokenType.KEEP, 490 TokenType.KILL, 491 TokenType.LEFT, 492 TokenType.LOAD, 493 TokenType.MERGE, 494 TokenType.NATURAL, 495 TokenType.NEXT, 496 TokenType.OFFSET, 497 TokenType.OPERATOR, 498 TokenType.ORDINALITY, 499 TokenType.OVERLAPS, 500 TokenType.OVERWRITE, 501 TokenType.PARTITION, 502 TokenType.PERCENT, 503 TokenType.PIVOT, 504 TokenType.PRAGMA, 505 TokenType.RANGE, 506 TokenType.RECURSIVE, 507 TokenType.REFERENCES, 508 TokenType.REFRESH, 509 TokenType.RENAME, 510 TokenType.REPLACE, 511 TokenType.RIGHT, 512 TokenType.ROLLUP, 513 TokenType.ROW, 514 TokenType.ROWS, 515 TokenType.SEMI, 516 TokenType.SET, 517 TokenType.SETTINGS, 518 TokenType.SHOW, 519 TokenType.TEMPORARY, 520 TokenType.TOP, 521 TokenType.TRUE, 522 TokenType.TRUNCATE, 523 TokenType.UNIQUE, 524 TokenType.UNNEST, 525 TokenType.UNPIVOT, 526 TokenType.UPDATE, 527 TokenType.USE, 528 TokenType.VOLATILE, 529 TokenType.WINDOW, 530 *CREATABLES, 531 *SUBQUERY_PREDICATES, 532 *TYPE_TOKENS, 533 *NO_PAREN_FUNCTIONS, 534 } 535 ID_VAR_TOKENS.remove(TokenType.UNION) 536 537 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 538 539 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 540 TokenType.ANTI, 541 TokenType.APPLY, 542 TokenType.ASOF, 543 TokenType.FULL, 544 TokenType.LEFT, 545 TokenType.LOCK, 546 TokenType.NATURAL, 547 TokenType.OFFSET, 548 TokenType.RIGHT, 549 TokenType.SEMI, 550 TokenType.WINDOW, 551 } 552 553 ALIAS_TOKENS = ID_VAR_TOKENS 554 555 ARRAY_CONSTRUCTORS = { 556 "ARRAY": exp.Array, 557 "LIST": exp.List, 558 } 559 560 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 561 562 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 563 564 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 565 566 FUNC_TOKENS = { 567 TokenType.COLLATE, 568 TokenType.COMMAND, 569 TokenType.CURRENT_DATE, 570 TokenType.CURRENT_DATETIME, 571 TokenType.CURRENT_TIMESTAMP, 572 TokenType.CURRENT_TIME, 573 TokenType.CURRENT_USER, 574 TokenType.FILTER, 575 TokenType.FIRST, 576 TokenType.FORMAT, 577 TokenType.GLOB, 578 TokenType.IDENTIFIER, 579 TokenType.INDEX, 580 TokenType.ISNULL, 581 TokenType.ILIKE, 582 TokenType.INSERT, 583 TokenType.LIKE, 584 TokenType.MERGE, 585 TokenType.OFFSET, 586 TokenType.PRIMARY_KEY, 587 TokenType.RANGE, 588 TokenType.REPLACE, 589 TokenType.RLIKE, 590 TokenType.ROW, 591 TokenType.UNNEST, 592 TokenType.VAR, 593 TokenType.LEFT, 594 TokenType.RIGHT, 595 TokenType.SEQUENCE, 596 TokenType.DATE, 597 TokenType.DATETIME, 598 TokenType.TABLE, 599 TokenType.TIMESTAMP, 600 TokenType.TIMESTAMPTZ, 601 TokenType.TRUNCATE, 602 TokenType.WINDOW, 603 TokenType.XOR, 604 *TYPE_TOKENS, 605 *SUBQUERY_PREDICATES, 606 } 607 608 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 609 TokenType.AND: exp.And, 610 } 611 612 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 613 TokenType.COLON_EQ: exp.PropertyEQ, 614 } 615 616 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 617 TokenType.OR: exp.Or, 618 } 619 620 EQUALITY = { 621 TokenType.EQ: exp.EQ, 622 TokenType.NEQ: exp.NEQ, 623 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 624 } 625 626 COMPARISON = { 627 TokenType.GT: exp.GT, 628 TokenType.GTE: exp.GTE, 629 TokenType.LT: exp.LT, 630 TokenType.LTE: exp.LTE, 631 } 632 633 BITWISE = { 634 TokenType.AMP: exp.BitwiseAnd, 635 TokenType.CARET: exp.BitwiseXor, 636 TokenType.PIPE: exp.BitwiseOr, 637 } 638 639 TERM = { 640 TokenType.DASH: exp.Sub, 641 TokenType.PLUS: exp.Add, 642 TokenType.MOD: exp.Mod, 643 TokenType.COLLATE: exp.Collate, 644 } 645 646 FACTOR = { 647 TokenType.DIV: exp.IntDiv, 648 TokenType.LR_ARROW: exp.Distance, 649 TokenType.SLASH: exp.Div, 650 TokenType.STAR: exp.Mul, 651 } 652 653 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 654 655 TIMES = { 656 TokenType.TIME, 657 TokenType.TIMETZ, 658 } 659 660 TIMESTAMPS = { 661 TokenType.TIMESTAMP, 662 TokenType.TIMESTAMPTZ, 663 TokenType.TIMESTAMPLTZ, 664 *TIMES, 665 } 666 667 SET_OPERATIONS = { 668 TokenType.UNION, 669 TokenType.INTERSECT, 670 TokenType.EXCEPT, 671 } 672 673 JOIN_METHODS = { 674 TokenType.ASOF, 675 TokenType.NATURAL, 676 TokenType.POSITIONAL, 677 } 678 679 JOIN_SIDES = { 680 TokenType.LEFT, 681 TokenType.RIGHT, 682 TokenType.FULL, 683 } 684 685 JOIN_KINDS = { 686 TokenType.ANTI, 687 TokenType.CROSS, 688 TokenType.INNER, 689 TokenType.OUTER, 690 TokenType.SEMI, 691 TokenType.STRAIGHT_JOIN, 692 } 693 694 JOIN_HINTS: t.Set[str] = set() 695 696 LAMBDAS = { 697 TokenType.ARROW: lambda self, expressions: self.expression( 698 exp.Lambda, 699 this=self._replace_lambda( 700 self._parse_assignment(), 701 expressions, 702 ), 703 expressions=expressions, 704 ), 705 TokenType.FARROW: lambda self, expressions: self.expression( 706 exp.Kwarg, 707 this=exp.var(expressions[0].name), 708 expression=self._parse_assignment(), 709 ), 710 } 711 712 COLUMN_OPERATORS = { 713 TokenType.DOT: None, 714 TokenType.DCOLON: lambda self, this, to: self.expression( 715 exp.Cast if self.STRICT_CAST else exp.TryCast, 716 this=this, 717 to=to, 718 ), 719 TokenType.ARROW: lambda self, this, path: self.expression( 720 exp.JSONExtract, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.DARROW: lambda self, this, path: self.expression( 726 exp.JSONExtractScalar, 727 this=this, 728 expression=self.dialect.to_json_path(path), 729 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 730 ), 731 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 732 exp.JSONBExtract, 733 this=this, 734 expression=path, 735 ), 736 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 737 exp.JSONBExtractScalar, 738 this=this, 739 expression=path, 740 ), 741 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 742 exp.JSONBContains, 743 this=this, 744 expression=key, 745 ), 746 } 747 748 EXPRESSION_PARSERS = { 749 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 750 exp.Column: lambda self: self._parse_column(), 751 exp.Condition: lambda self: self._parse_assignment(), 752 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 753 exp.Expression: lambda self: self._parse_expression(), 754 exp.From: lambda self: self._parse_from(joins=True), 755 exp.Group: lambda self: self._parse_group(), 756 exp.Having: lambda self: self._parse_having(), 757 exp.Identifier: lambda self: self._parse_id_var(), 758 exp.Join: lambda self: self._parse_join(), 759 exp.Lambda: lambda self: self._parse_lambda(), 760 exp.Lateral: lambda self: self._parse_lateral(), 761 exp.Limit: lambda self: self._parse_limit(), 762 exp.Offset: lambda self: self._parse_offset(), 763 exp.Order: lambda self: self._parse_order(), 764 exp.Ordered: lambda self: self._parse_ordered(), 765 exp.Properties: lambda self: self._parse_properties(), 766 exp.Qualify: lambda self: self._parse_qualify(), 767 exp.Returning: lambda self: self._parse_returning(), 768 exp.Select: lambda self: self._parse_select(), 769 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 770 exp.Table: lambda self: self._parse_table_parts(), 771 exp.TableAlias: lambda self: self._parse_table_alias(), 772 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 773 exp.Where: lambda self: self._parse_where(), 774 exp.Window: lambda self: self._parse_named_window(), 775 exp.With: lambda self: self._parse_with(), 776 "JOIN_TYPE": lambda self: self._parse_join_parts(), 777 } 778 779 STATEMENT_PARSERS = { 780 TokenType.ALTER: lambda self: self._parse_alter(), 781 TokenType.BEGIN: lambda self: self._parse_transaction(), 782 TokenType.CACHE: lambda self: self._parse_cache(), 783 TokenType.COMMENT: lambda self: self._parse_comment(), 784 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 785 TokenType.COPY: lambda self: self._parse_copy(), 786 TokenType.CREATE: lambda self: self._parse_create(), 787 TokenType.DELETE: lambda self: self._parse_delete(), 788 TokenType.DESC: lambda self: self._parse_describe(), 789 TokenType.DESCRIBE: lambda self: self._parse_describe(), 790 TokenType.DROP: lambda self: self._parse_drop(), 791 TokenType.GRANT: lambda self: self._parse_grant(), 792 TokenType.INSERT: lambda self: self._parse_insert(), 793 TokenType.KILL: lambda self: self._parse_kill(), 794 TokenType.LOAD: lambda self: self._parse_load(), 795 TokenType.MERGE: lambda self: self._parse_merge(), 796 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 797 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 798 TokenType.REFRESH: lambda self: self._parse_refresh(), 799 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 800 TokenType.SET: lambda self: self._parse_set(), 801 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 802 TokenType.UNCACHE: lambda self: self._parse_uncache(), 803 TokenType.UPDATE: lambda self: self._parse_update(), 804 TokenType.USE: lambda self: self.expression( 805 exp.Use, 806 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 807 this=self._parse_table(schema=False), 808 ), 809 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 810 } 811 812 UNARY_PARSERS = { 813 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 814 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 815 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 816 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 817 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 818 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 819 } 820 821 STRING_PARSERS = { 822 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 823 exp.RawString, this=token.text 824 ), 825 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 826 exp.National, this=token.text 827 ), 828 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 829 TokenType.STRING: lambda self, token: self.expression( 830 exp.Literal, this=token.text, is_string=True 831 ), 832 TokenType.UNICODE_STRING: lambda self, token: self.expression( 833 exp.UnicodeString, 834 this=token.text, 835 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 836 ), 837 } 838 839 NUMERIC_PARSERS = { 840 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 841 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 842 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 843 TokenType.NUMBER: lambda self, token: self.expression( 844 exp.Literal, this=token.text, is_string=False 845 ), 846 } 847 848 PRIMARY_PARSERS = { 849 **STRING_PARSERS, 850 **NUMERIC_PARSERS, 851 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 852 TokenType.NULL: lambda self, _: self.expression(exp.Null), 853 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 854 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 855 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 856 TokenType.STAR: lambda self, _: self._parse_star_ops(), 857 } 858 859 PLACEHOLDER_PARSERS = { 860 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 861 TokenType.PARAMETER: lambda self: self._parse_parameter(), 862 TokenType.COLON: lambda self: ( 863 self.expression(exp.Placeholder, this=self._prev.text) 864 if self._match_set(self.ID_VAR_TOKENS) 865 else None 866 ), 867 } 868 869 RANGE_PARSERS = { 870 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 871 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 872 TokenType.GLOB: binary_range_parser(exp.Glob), 873 TokenType.ILIKE: binary_range_parser(exp.ILike), 874 TokenType.IN: lambda self, this: self._parse_in(this), 875 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 876 TokenType.IS: lambda self, this: self._parse_is(this), 877 TokenType.LIKE: binary_range_parser(exp.Like), 878 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 879 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 880 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 881 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 882 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 883 } 884 885 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 886 "ALLOWED_VALUES": lambda self: self.expression( 887 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 888 ), 889 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 890 "AUTO": lambda self: self._parse_auto_property(), 891 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 892 "BACKUP": lambda self: self.expression( 893 exp.BackupProperty, this=self._parse_var(any_token=True) 894 ), 895 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 896 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 897 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 898 "CHECKSUM": lambda self: self._parse_checksum(), 899 "CLUSTER BY": lambda self: self._parse_cluster(), 900 "CLUSTERED": lambda self: self._parse_clustered_by(), 901 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 902 exp.CollateProperty, **kwargs 903 ), 904 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 905 "CONTAINS": lambda self: self._parse_contains_property(), 906 "COPY": lambda self: self._parse_copy_property(), 907 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 908 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 909 "DEFINER": lambda self: self._parse_definer(), 910 "DETERMINISTIC": lambda self: self.expression( 911 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 912 ), 913 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 914 "DUPLICATE": lambda self: self._parse_duplicate(), 915 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 916 "DISTKEY": lambda self: self._parse_distkey(), 917 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 918 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 919 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 920 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 921 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 922 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 923 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 924 "FREESPACE": lambda self: self._parse_freespace(), 925 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 926 "HEAP": lambda self: self.expression(exp.HeapProperty), 927 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 928 "IMMUTABLE": lambda self: self.expression( 929 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 930 ), 931 "INHERITS": lambda self: self.expression( 932 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 933 ), 934 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 935 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 936 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 937 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 938 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 939 "LIKE": lambda self: self._parse_create_like(), 940 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 941 "LOCK": lambda self: self._parse_locking(), 942 "LOCKING": lambda self: self._parse_locking(), 943 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 944 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 945 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 946 "MODIFIES": lambda self: self._parse_modifies_property(), 947 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 948 "NO": lambda self: self._parse_no_property(), 949 "ON": lambda self: self._parse_on_property(), 950 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 951 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 952 "PARTITION": lambda self: self._parse_partitioned_of(), 953 "PARTITION BY": lambda self: self._parse_partitioned_by(), 954 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 955 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 956 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 957 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 958 "READS": lambda self: self._parse_reads_property(), 959 "REMOTE": lambda self: self._parse_remote_with_connection(), 960 "RETURNS": lambda self: self._parse_returns(), 961 "STRICT": lambda self: self.expression(exp.StrictProperty), 962 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 963 "ROW": lambda self: self._parse_row(), 964 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 965 "SAMPLE": lambda self: self.expression( 966 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 967 ), 968 "SECURE": lambda self: self.expression(exp.SecureProperty), 969 "SECURITY": lambda self: self._parse_security(), 970 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 971 "SETTINGS": lambda self: self._parse_settings_property(), 972 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 973 "SORTKEY": lambda self: self._parse_sortkey(), 974 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 975 "STABLE": lambda self: self.expression( 976 exp.StabilityProperty, this=exp.Literal.string("STABLE") 977 ), 978 "STORED": lambda self: self._parse_stored(), 979 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 980 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 981 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 982 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 983 "TO": lambda self: self._parse_to_table(), 984 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 985 "TRANSFORM": lambda self: self.expression( 986 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 987 ), 988 "TTL": lambda self: self._parse_ttl(), 989 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 990 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 991 "VOLATILE": lambda self: self._parse_volatile_property(), 992 "WITH": lambda self: self._parse_with_property(), 993 } 994 995 CONSTRAINT_PARSERS = { 996 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 997 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 998 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 999 "CHARACTER SET": lambda self: self.expression( 1000 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1001 ), 1002 "CHECK": lambda self: self.expression( 1003 exp.CheckColumnConstraint, 1004 this=self._parse_wrapped(self._parse_assignment), 1005 enforced=self._match_text_seq("ENFORCED"), 1006 ), 1007 "COLLATE": lambda self: self.expression( 1008 exp.CollateColumnConstraint, 1009 this=self._parse_identifier() or self._parse_column(), 1010 ), 1011 "COMMENT": lambda self: self.expression( 1012 exp.CommentColumnConstraint, this=self._parse_string() 1013 ), 1014 "COMPRESS": lambda self: self._parse_compress(), 1015 "CLUSTERED": lambda self: self.expression( 1016 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1017 ), 1018 "NONCLUSTERED": lambda self: self.expression( 1019 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1020 ), 1021 "DEFAULT": lambda self: self.expression( 1022 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1023 ), 1024 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1025 "EPHEMERAL": lambda self: self.expression( 1026 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1027 ), 1028 "EXCLUDE": lambda self: self.expression( 1029 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1030 ), 1031 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1032 "FORMAT": lambda self: self.expression( 1033 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1034 ), 1035 "GENERATED": lambda self: self._parse_generated_as_identity(), 1036 "IDENTITY": lambda self: self._parse_auto_increment(), 1037 "INLINE": lambda self: self._parse_inline(), 1038 "LIKE": lambda self: self._parse_create_like(), 1039 "NOT": lambda self: self._parse_not_constraint(), 1040 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1041 "ON": lambda self: ( 1042 self._match(TokenType.UPDATE) 1043 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1044 ) 1045 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1046 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1047 "PERIOD": lambda self: self._parse_period_for_system_time(), 1048 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1049 "REFERENCES": lambda self: self._parse_references(match=False), 1050 "TITLE": lambda self: self.expression( 1051 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1052 ), 1053 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1054 "UNIQUE": lambda self: self._parse_unique(), 1055 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1056 "WITH": lambda self: self.expression( 1057 exp.Properties, expressions=self._parse_wrapped_properties() 1058 ), 1059 } 1060 1061 ALTER_PARSERS = { 1062 "ADD": lambda self: self._parse_alter_table_add(), 1063 "AS": lambda self: self._parse_select(), 1064 "ALTER": lambda self: self._parse_alter_table_alter(), 1065 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1066 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1067 "DROP": lambda self: self._parse_alter_table_drop(), 1068 "RENAME": lambda self: self._parse_alter_table_rename(), 1069 "SET": lambda self: self._parse_alter_table_set(), 1070 "SWAP": lambda self: self.expression( 1071 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1072 ), 1073 } 1074 1075 ALTER_ALTER_PARSERS = { 1076 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1077 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1078 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1079 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1080 } 1081 1082 SCHEMA_UNNAMED_CONSTRAINTS = { 1083 "CHECK", 1084 "EXCLUDE", 1085 "FOREIGN KEY", 1086 "LIKE", 1087 "PERIOD", 1088 "PRIMARY KEY", 1089 "UNIQUE", 1090 } 1091 1092 NO_PAREN_FUNCTION_PARSERS = { 1093 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1094 "CASE": lambda self: self._parse_case(), 1095 "CONNECT_BY_ROOT": lambda self: self.expression( 1096 exp.ConnectByRoot, this=self._parse_column() 1097 ), 1098 "IF": lambda self: self._parse_if(), 1099 "NEXT": lambda self: self._parse_next_value_for(), 1100 } 1101 1102 INVALID_FUNC_NAME_TOKENS = { 1103 TokenType.IDENTIFIER, 1104 TokenType.STRING, 1105 } 1106 1107 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1108 1109 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1110 1111 FUNCTION_PARSERS = { 1112 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1113 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1114 "DECODE": lambda self: self._parse_decode(), 1115 "EXTRACT": lambda self: self._parse_extract(), 1116 "GAP_FILL": lambda self: self._parse_gap_fill(), 1117 "JSON_OBJECT": lambda self: self._parse_json_object(), 1118 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1119 "JSON_TABLE": lambda self: self._parse_json_table(), 1120 "MATCH": lambda self: self._parse_match_against(), 1121 "NORMALIZE": lambda self: self._parse_normalize(), 1122 "OPENJSON": lambda self: self._parse_open_json(), 1123 "OVERLAY": lambda self: self._parse_overlay(), 1124 "POSITION": lambda self: self._parse_position(), 1125 "PREDICT": lambda self: self._parse_predict(), 1126 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1127 "STRING_AGG": lambda self: self._parse_string_agg(), 1128 "SUBSTRING": lambda self: self._parse_substring(), 1129 "TRIM": lambda self: self._parse_trim(), 1130 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1131 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1132 } 1133 1134 QUERY_MODIFIER_PARSERS = { 1135 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1136 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1137 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1138 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1139 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1140 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1141 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1142 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1143 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1144 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1145 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1146 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1147 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1148 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1149 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1150 TokenType.CLUSTER_BY: lambda self: ( 1151 "cluster", 1152 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1153 ), 1154 TokenType.DISTRIBUTE_BY: lambda self: ( 1155 "distribute", 1156 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1157 ), 1158 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1159 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1160 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1161 } 1162 1163 SET_PARSERS = { 1164 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1165 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1166 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1167 "TRANSACTION": lambda self: self._parse_set_transaction(), 1168 } 1169 1170 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1171 1172 TYPE_LITERAL_PARSERS = { 1173 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1174 } 1175 1176 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1177 1178 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1179 1180 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1181 1182 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1183 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1184 "ISOLATION": ( 1185 ("LEVEL", "REPEATABLE", "READ"), 1186 ("LEVEL", "READ", "COMMITTED"), 1187 ("LEVEL", "READ", "UNCOMITTED"), 1188 ("LEVEL", "SERIALIZABLE"), 1189 ), 1190 "READ": ("WRITE", "ONLY"), 1191 } 1192 1193 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1194 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1195 ) 1196 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1197 1198 CREATE_SEQUENCE: OPTIONS_TYPE = { 1199 "SCALE": ("EXTEND", "NOEXTEND"), 1200 "SHARD": ("EXTEND", "NOEXTEND"), 1201 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1202 **dict.fromkeys( 1203 ( 1204 "SESSION", 1205 "GLOBAL", 1206 "KEEP", 1207 "NOKEEP", 1208 "ORDER", 1209 "NOORDER", 1210 "NOCACHE", 1211 "CYCLE", 1212 "NOCYCLE", 1213 "NOMINVALUE", 1214 "NOMAXVALUE", 1215 "NOSCALE", 1216 "NOSHARD", 1217 ), 1218 tuple(), 1219 ), 1220 } 1221 1222 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1223 1224 USABLES: OPTIONS_TYPE = dict.fromkeys( 1225 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1226 ) 1227 1228 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1229 1230 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1231 "TYPE": ("EVOLUTION",), 1232 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1233 } 1234 1235 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1236 1237 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1238 1239 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1240 "NOT": ("ENFORCED",), 1241 "MATCH": ( 1242 "FULL", 1243 "PARTIAL", 1244 "SIMPLE", 1245 ), 1246 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1247 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1248 } 1249 1250 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1251 1252 CLONE_KEYWORDS = {"CLONE", "COPY"} 1253 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1254 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1255 1256 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1257 1258 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1259 1260 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1261 1262 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1263 1264 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1265 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1266 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1267 1268 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1269 1270 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1271 1272 ADD_CONSTRAINT_TOKENS = { 1273 TokenType.CONSTRAINT, 1274 TokenType.FOREIGN_KEY, 1275 TokenType.INDEX, 1276 TokenType.KEY, 1277 TokenType.PRIMARY_KEY, 1278 TokenType.UNIQUE, 1279 } 1280 1281 DISTINCT_TOKENS = {TokenType.DISTINCT} 1282 1283 NULL_TOKENS = {TokenType.NULL} 1284 1285 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1286 1287 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1288 1289 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1290 1291 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1292 1293 ODBC_DATETIME_LITERALS = { 1294 "d": exp.Date, 1295 "t": exp.Time, 1296 "ts": exp.Timestamp, 1297 } 1298 1299 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1300 1301 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1302 1303 # The style options for the DESCRIBE statement 1304 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1305 1306 OPERATION_MODIFIERS: t.Set[str] = set() 1307 1308 STRICT_CAST = True 1309 1310 PREFIXED_PIVOT_COLUMNS = False 1311 IDENTIFY_PIVOT_STRINGS = False 1312 1313 LOG_DEFAULTS_TO_LN = False 1314 1315 # Whether ADD is present for each column added by ALTER TABLE 1316 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1317 1318 # Whether the table sample clause expects CSV syntax 1319 TABLESAMPLE_CSV = False 1320 1321 # The default method used for table sampling 1322 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1323 1324 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1325 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1326 1327 # Whether the TRIM function expects the characters to trim as its first argument 1328 TRIM_PATTERN_FIRST = False 1329 1330 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1331 STRING_ALIASES = False 1332 1333 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1334 MODIFIERS_ATTACHED_TO_SET_OP = True 1335 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1336 1337 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1338 NO_PAREN_IF_COMMANDS = True 1339 1340 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1341 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1342 1343 # Whether the `:` operator is used to extract a value from a VARIANT column 1344 COLON_IS_VARIANT_EXTRACT = False 1345 1346 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1347 # If this is True and '(' is not found, the keyword will be treated as an identifier 1348 VALUES_FOLLOWED_BY_PAREN = True 1349 1350 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1351 SUPPORTS_IMPLICIT_UNNEST = False 1352 1353 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1354 INTERVAL_SPANS = True 1355 1356 # Whether a PARTITION clause can follow a table reference 1357 SUPPORTS_PARTITION_SELECTION = False 1358 1359 __slots__ = ( 1360 "error_level", 1361 "error_message_context", 1362 "max_errors", 1363 "dialect", 1364 "sql", 1365 "errors", 1366 "_tokens", 1367 "_index", 1368 "_curr", 1369 "_next", 1370 "_prev", 1371 "_prev_comments", 1372 ) 1373 1374 # Autofilled 1375 SHOW_TRIE: t.Dict = {} 1376 SET_TRIE: t.Dict = {} 1377 1378 def __init__( 1379 self, 1380 error_level: t.Optional[ErrorLevel] = None, 1381 error_message_context: int = 100, 1382 max_errors: int = 3, 1383 dialect: DialectType = None, 1384 ): 1385 from sqlglot.dialects import Dialect 1386 1387 self.error_level = error_level or ErrorLevel.IMMEDIATE 1388 self.error_message_context = error_message_context 1389 self.max_errors = max_errors 1390 self.dialect = Dialect.get_or_raise(dialect) 1391 self.reset() 1392 1393 def reset(self): 1394 self.sql = "" 1395 self.errors = [] 1396 self._tokens = [] 1397 self._index = 0 1398 self._curr = None 1399 self._next = None 1400 self._prev = None 1401 self._prev_comments = None 1402 1403 def parse( 1404 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens and returns a list of syntax trees, one tree 1408 per parsed SQL statement. 1409 1410 Args: 1411 raw_tokens: The list of tokens. 1412 sql: The original SQL string, used to produce helpful debug messages. 1413 1414 Returns: 1415 The list of the produced syntax trees. 1416 """ 1417 return self._parse( 1418 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1419 ) 1420 1421 def parse_into( 1422 self, 1423 expression_types: exp.IntoType, 1424 raw_tokens: t.List[Token], 1425 sql: t.Optional[str] = None, 1426 ) -> t.List[t.Optional[exp.Expression]]: 1427 """ 1428 Parses a list of tokens into a given Expression type. If a collection of Expression 1429 types is given instead, this method will try to parse the token list into each one 1430 of them, stopping at the first for which the parsing succeeds. 1431 1432 Args: 1433 expression_types: The expression type(s) to try and parse the token list into. 1434 raw_tokens: The list of tokens. 1435 sql: The original SQL string, used to produce helpful debug messages. 1436 1437 Returns: 1438 The target Expression. 1439 """ 1440 errors = [] 1441 for expression_type in ensure_list(expression_types): 1442 parser = self.EXPRESSION_PARSERS.get(expression_type) 1443 if not parser: 1444 raise TypeError(f"No parser registered for {expression_type}") 1445 1446 try: 1447 return self._parse(parser, raw_tokens, sql) 1448 except ParseError as e: 1449 e.errors[0]["into_expression"] = expression_type 1450 errors.append(e) 1451 1452 raise ParseError( 1453 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1454 errors=merge_errors(errors), 1455 ) from errors[-1] 1456 1457 def _parse( 1458 self, 1459 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1460 raw_tokens: t.List[Token], 1461 sql: t.Optional[str] = None, 1462 ) -> t.List[t.Optional[exp.Expression]]: 1463 self.reset() 1464 self.sql = sql or "" 1465 1466 total = len(raw_tokens) 1467 chunks: t.List[t.List[Token]] = [[]] 1468 1469 for i, token in enumerate(raw_tokens): 1470 if token.token_type == TokenType.SEMICOLON: 1471 if token.comments: 1472 chunks.append([token]) 1473 1474 if i < total - 1: 1475 chunks.append([]) 1476 else: 1477 chunks[-1].append(token) 1478 1479 expressions = [] 1480 1481 for tokens in chunks: 1482 self._index = -1 1483 self._tokens = tokens 1484 self._advance() 1485 1486 expressions.append(parse_method(self)) 1487 1488 if self._index < len(self._tokens): 1489 self.raise_error("Invalid expression / Unexpected token") 1490 1491 self.check_errors() 1492 1493 return expressions 1494 1495 def check_errors(self) -> None: 1496 """Logs or raises any found errors, depending on the chosen error level setting.""" 1497 if self.error_level == ErrorLevel.WARN: 1498 for error in self.errors: 1499 logger.error(str(error)) 1500 elif self.error_level == ErrorLevel.RAISE and self.errors: 1501 raise ParseError( 1502 concat_messages(self.errors, self.max_errors), 1503 errors=merge_errors(self.errors), 1504 ) 1505 1506 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1507 """ 1508 Appends an error in the list of recorded errors or raises it, depending on the chosen 1509 error level setting. 1510 """ 1511 token = token or self._curr or self._prev or Token.string("") 1512 start = token.start 1513 end = token.end + 1 1514 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1515 highlight = self.sql[start:end] 1516 end_context = self.sql[end : end + self.error_message_context] 1517 1518 error = ParseError.new( 1519 f"{message}. Line {token.line}, Col: {token.col}.\n" 1520 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1521 description=message, 1522 line=token.line, 1523 col=token.col, 1524 start_context=start_context, 1525 highlight=highlight, 1526 end_context=end_context, 1527 ) 1528 1529 if self.error_level == ErrorLevel.IMMEDIATE: 1530 raise error 1531 1532 self.errors.append(error) 1533 1534 def expression( 1535 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1536 ) -> E: 1537 """ 1538 Creates a new, validated Expression. 1539 1540 Args: 1541 exp_class: The expression class to instantiate. 1542 comments: An optional list of comments to attach to the expression. 1543 kwargs: The arguments to set for the expression along with their respective values. 1544 1545 Returns: 1546 The target expression. 1547 """ 1548 instance = exp_class(**kwargs) 1549 instance.add_comments(comments) if comments else self._add_comments(instance) 1550 return self.validate_expression(instance) 1551 1552 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1553 if expression and self._prev_comments: 1554 expression.add_comments(self._prev_comments) 1555 self._prev_comments = None 1556 1557 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1558 """ 1559 Validates an Expression, making sure that all its mandatory arguments are set. 1560 1561 Args: 1562 expression: The expression to validate. 1563 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1564 1565 Returns: 1566 The validated expression. 1567 """ 1568 if self.error_level != ErrorLevel.IGNORE: 1569 for error_message in expression.error_messages(args): 1570 self.raise_error(error_message) 1571 1572 return expression 1573 1574 def _find_sql(self, start: Token, end: Token) -> str: 1575 return self.sql[start.start : end.end + 1] 1576 1577 def _is_connected(self) -> bool: 1578 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1579 1580 def _advance(self, times: int = 1) -> None: 1581 self._index += times 1582 self._curr = seq_get(self._tokens, self._index) 1583 self._next = seq_get(self._tokens, self._index + 1) 1584 1585 if self._index > 0: 1586 self._prev = self._tokens[self._index - 1] 1587 self._prev_comments = self._prev.comments 1588 else: 1589 self._prev = None 1590 self._prev_comments = None 1591 1592 def _retreat(self, index: int) -> None: 1593 if index != self._index: 1594 self._advance(index - self._index) 1595 1596 def _warn_unsupported(self) -> None: 1597 if len(self._tokens) <= 1: 1598 return 1599 1600 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1601 # interested in emitting a warning for the one being currently processed. 1602 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1603 1604 logger.warning( 1605 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1606 ) 1607 1608 def _parse_command(self) -> exp.Command: 1609 self._warn_unsupported() 1610 return self.expression( 1611 exp.Command, 1612 comments=self._prev_comments, 1613 this=self._prev.text.upper(), 1614 expression=self._parse_string(), 1615 ) 1616 1617 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1618 """ 1619 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1620 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1621 solve this by setting & resetting the parser state accordingly 1622 """ 1623 index = self._index 1624 error_level = self.error_level 1625 1626 self.error_level = ErrorLevel.IMMEDIATE 1627 try: 1628 this = parse_method() 1629 except ParseError: 1630 this = None 1631 finally: 1632 if not this or retreat: 1633 self._retreat(index) 1634 self.error_level = error_level 1635 1636 return this 1637 1638 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1639 start = self._prev 1640 exists = self._parse_exists() if allow_exists else None 1641 1642 self._match(TokenType.ON) 1643 1644 materialized = self._match_text_seq("MATERIALIZED") 1645 kind = self._match_set(self.CREATABLES) and self._prev 1646 if not kind: 1647 return self._parse_as_command(start) 1648 1649 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1650 this = self._parse_user_defined_function(kind=kind.token_type) 1651 elif kind.token_type == TokenType.TABLE: 1652 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1653 elif kind.token_type == TokenType.COLUMN: 1654 this = self._parse_column() 1655 else: 1656 this = self._parse_id_var() 1657 1658 self._match(TokenType.IS) 1659 1660 return self.expression( 1661 exp.Comment, 1662 this=this, 1663 kind=kind.text, 1664 expression=self._parse_string(), 1665 exists=exists, 1666 materialized=materialized, 1667 ) 1668 1669 def _parse_to_table( 1670 self, 1671 ) -> exp.ToTableProperty: 1672 table = self._parse_table_parts(schema=True) 1673 return self.expression(exp.ToTableProperty, this=table) 1674 1675 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1676 def _parse_ttl(self) -> exp.Expression: 1677 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1678 this = self._parse_bitwise() 1679 1680 if self._match_text_seq("DELETE"): 1681 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1682 if self._match_text_seq("RECOMPRESS"): 1683 return self.expression( 1684 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1685 ) 1686 if self._match_text_seq("TO", "DISK"): 1687 return self.expression( 1688 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1689 ) 1690 if self._match_text_seq("TO", "VOLUME"): 1691 return self.expression( 1692 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1693 ) 1694 1695 return this 1696 1697 expressions = self._parse_csv(_parse_ttl_action) 1698 where = self._parse_where() 1699 group = self._parse_group() 1700 1701 aggregates = None 1702 if group and self._match(TokenType.SET): 1703 aggregates = self._parse_csv(self._parse_set_item) 1704 1705 return self.expression( 1706 exp.MergeTreeTTL, 1707 expressions=expressions, 1708 where=where, 1709 group=group, 1710 aggregates=aggregates, 1711 ) 1712 1713 def _parse_statement(self) -> t.Optional[exp.Expression]: 1714 if self._curr is None: 1715 return None 1716 1717 if self._match_set(self.STATEMENT_PARSERS): 1718 comments = self._prev_comments 1719 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1720 stmt.add_comments(comments, prepend=True) 1721 return stmt 1722 1723 if self._match_set(self.dialect.tokenizer.COMMANDS): 1724 return self._parse_command() 1725 1726 expression = self._parse_expression() 1727 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1728 return self._parse_query_modifiers(expression) 1729 1730 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1731 start = self._prev 1732 temporary = self._match(TokenType.TEMPORARY) 1733 materialized = self._match_text_seq("MATERIALIZED") 1734 1735 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1736 if not kind: 1737 return self._parse_as_command(start) 1738 1739 concurrently = self._match_text_seq("CONCURRENTLY") 1740 if_exists = exists or self._parse_exists() 1741 1742 if kind == "COLUMN": 1743 this = self._parse_column() 1744 else: 1745 this = self._parse_table_parts( 1746 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1747 ) 1748 1749 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1750 1751 if self._match(TokenType.L_PAREN, advance=False): 1752 expressions = self._parse_wrapped_csv(self._parse_types) 1753 else: 1754 expressions = None 1755 1756 return self.expression( 1757 exp.Drop, 1758 exists=if_exists, 1759 this=this, 1760 expressions=expressions, 1761 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1762 temporary=temporary, 1763 materialized=materialized, 1764 cascade=self._match_text_seq("CASCADE"), 1765 constraints=self._match_text_seq("CONSTRAINTS"), 1766 purge=self._match_text_seq("PURGE"), 1767 cluster=cluster, 1768 concurrently=concurrently, 1769 ) 1770 1771 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1772 return ( 1773 self._match_text_seq("IF") 1774 and (not not_ or self._match(TokenType.NOT)) 1775 and self._match(TokenType.EXISTS) 1776 ) 1777 1778 def _parse_create(self) -> exp.Create | exp.Command: 1779 # Note: this can't be None because we've matched a statement parser 1780 start = self._prev 1781 1782 replace = ( 1783 start.token_type == TokenType.REPLACE 1784 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1785 or self._match_pair(TokenType.OR, TokenType.ALTER) 1786 ) 1787 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1788 1789 unique = self._match(TokenType.UNIQUE) 1790 1791 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1792 clustered = True 1793 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1794 "COLUMNSTORE" 1795 ): 1796 clustered = False 1797 else: 1798 clustered = None 1799 1800 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1801 self._advance() 1802 1803 properties = None 1804 create_token = self._match_set(self.CREATABLES) and self._prev 1805 1806 if not create_token: 1807 # exp.Properties.Location.POST_CREATE 1808 properties = self._parse_properties() 1809 create_token = self._match_set(self.CREATABLES) and self._prev 1810 1811 if not properties or not create_token: 1812 return self._parse_as_command(start) 1813 1814 concurrently = self._match_text_seq("CONCURRENTLY") 1815 exists = self._parse_exists(not_=True) 1816 this = None 1817 expression: t.Optional[exp.Expression] = None 1818 indexes = None 1819 no_schema_binding = None 1820 begin = None 1821 end = None 1822 clone = None 1823 1824 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1825 nonlocal properties 1826 if properties and temp_props: 1827 properties.expressions.extend(temp_props.expressions) 1828 elif temp_props: 1829 properties = temp_props 1830 1831 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1832 this = self._parse_user_defined_function(kind=create_token.token_type) 1833 1834 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1835 extend_props(self._parse_properties()) 1836 1837 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1838 extend_props(self._parse_properties()) 1839 1840 if not expression: 1841 if self._match(TokenType.COMMAND): 1842 expression = self._parse_as_command(self._prev) 1843 else: 1844 begin = self._match(TokenType.BEGIN) 1845 return_ = self._match_text_seq("RETURN") 1846 1847 if self._match(TokenType.STRING, advance=False): 1848 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1849 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1850 expression = self._parse_string() 1851 extend_props(self._parse_properties()) 1852 else: 1853 expression = self._parse_user_defined_function_expression() 1854 1855 end = self._match_text_seq("END") 1856 1857 if return_: 1858 expression = self.expression(exp.Return, this=expression) 1859 elif create_token.token_type == TokenType.INDEX: 1860 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1861 if not self._match(TokenType.ON): 1862 index = self._parse_id_var() 1863 anonymous = False 1864 else: 1865 index = None 1866 anonymous = True 1867 1868 this = self._parse_index(index=index, anonymous=anonymous) 1869 elif create_token.token_type in self.DB_CREATABLES: 1870 table_parts = self._parse_table_parts( 1871 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1872 ) 1873 1874 # exp.Properties.Location.POST_NAME 1875 self._match(TokenType.COMMA) 1876 extend_props(self._parse_properties(before=True)) 1877 1878 this = self._parse_schema(this=table_parts) 1879 1880 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1881 extend_props(self._parse_properties()) 1882 1883 self._match(TokenType.ALIAS) 1884 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1885 # exp.Properties.Location.POST_ALIAS 1886 extend_props(self._parse_properties()) 1887 1888 if create_token.token_type == TokenType.SEQUENCE: 1889 expression = self._parse_types() 1890 extend_props(self._parse_properties()) 1891 else: 1892 expression = self._parse_ddl_select() 1893 1894 if create_token.token_type == TokenType.TABLE: 1895 # exp.Properties.Location.POST_EXPRESSION 1896 extend_props(self._parse_properties()) 1897 1898 indexes = [] 1899 while True: 1900 index = self._parse_index() 1901 1902 # exp.Properties.Location.POST_INDEX 1903 extend_props(self._parse_properties()) 1904 if not index: 1905 break 1906 else: 1907 self._match(TokenType.COMMA) 1908 indexes.append(index) 1909 elif create_token.token_type == TokenType.VIEW: 1910 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1911 no_schema_binding = True 1912 1913 shallow = self._match_text_seq("SHALLOW") 1914 1915 if self._match_texts(self.CLONE_KEYWORDS): 1916 copy = self._prev.text.lower() == "copy" 1917 clone = self.expression( 1918 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1919 ) 1920 1921 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1922 return self._parse_as_command(start) 1923 1924 create_kind_text = create_token.text.upper() 1925 return self.expression( 1926 exp.Create, 1927 this=this, 1928 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1929 replace=replace, 1930 refresh=refresh, 1931 unique=unique, 1932 expression=expression, 1933 exists=exists, 1934 properties=properties, 1935 indexes=indexes, 1936 no_schema_binding=no_schema_binding, 1937 begin=begin, 1938 end=end, 1939 clone=clone, 1940 concurrently=concurrently, 1941 clustered=clustered, 1942 ) 1943 1944 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1945 seq = exp.SequenceProperties() 1946 1947 options = [] 1948 index = self._index 1949 1950 while self._curr: 1951 self._match(TokenType.COMMA) 1952 if self._match_text_seq("INCREMENT"): 1953 self._match_text_seq("BY") 1954 self._match_text_seq("=") 1955 seq.set("increment", self._parse_term()) 1956 elif self._match_text_seq("MINVALUE"): 1957 seq.set("minvalue", self._parse_term()) 1958 elif self._match_text_seq("MAXVALUE"): 1959 seq.set("maxvalue", self._parse_term()) 1960 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1961 self._match_text_seq("=") 1962 seq.set("start", self._parse_term()) 1963 elif self._match_text_seq("CACHE"): 1964 # T-SQL allows empty CACHE which is initialized dynamically 1965 seq.set("cache", self._parse_number() or True) 1966 elif self._match_text_seq("OWNED", "BY"): 1967 # "OWNED BY NONE" is the default 1968 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1969 else: 1970 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1971 if opt: 1972 options.append(opt) 1973 else: 1974 break 1975 1976 seq.set("options", options if options else None) 1977 return None if self._index == index else seq 1978 1979 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1980 # only used for teradata currently 1981 self._match(TokenType.COMMA) 1982 1983 kwargs = { 1984 "no": self._match_text_seq("NO"), 1985 "dual": self._match_text_seq("DUAL"), 1986 "before": self._match_text_seq("BEFORE"), 1987 "default": self._match_text_seq("DEFAULT"), 1988 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1989 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1990 "after": self._match_text_seq("AFTER"), 1991 "minimum": self._match_texts(("MIN", "MINIMUM")), 1992 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1993 } 1994 1995 if self._match_texts(self.PROPERTY_PARSERS): 1996 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1997 try: 1998 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1999 except TypeError: 2000 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2001 2002 return None 2003 2004 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2005 return self._parse_wrapped_csv(self._parse_property) 2006 2007 def _parse_property(self) -> t.Optional[exp.Expression]: 2008 if self._match_texts(self.PROPERTY_PARSERS): 2009 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2010 2011 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2012 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2013 2014 if self._match_text_seq("COMPOUND", "SORTKEY"): 2015 return self._parse_sortkey(compound=True) 2016 2017 if self._match_text_seq("SQL", "SECURITY"): 2018 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2019 2020 index = self._index 2021 key = self._parse_column() 2022 2023 if not self._match(TokenType.EQ): 2024 self._retreat(index) 2025 return self._parse_sequence_properties() 2026 2027 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2028 if isinstance(key, exp.Column): 2029 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2030 2031 value = self._parse_bitwise() or self._parse_var(any_token=True) 2032 2033 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2034 if isinstance(value, exp.Column): 2035 value = exp.var(value.name) 2036 2037 return self.expression(exp.Property, this=key, value=value) 2038 2039 def _parse_stored(self) -> exp.FileFormatProperty: 2040 self._match(TokenType.ALIAS) 2041 2042 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2043 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2044 2045 return self.expression( 2046 exp.FileFormatProperty, 2047 this=( 2048 self.expression( 2049 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2050 ) 2051 if input_format or output_format 2052 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2053 ), 2054 ) 2055 2056 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2057 field = self._parse_field() 2058 if isinstance(field, exp.Identifier) and not field.quoted: 2059 field = exp.var(field) 2060 2061 return field 2062 2063 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2064 self._match(TokenType.EQ) 2065 self._match(TokenType.ALIAS) 2066 2067 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2068 2069 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2070 properties = [] 2071 while True: 2072 if before: 2073 prop = self._parse_property_before() 2074 else: 2075 prop = self._parse_property() 2076 if not prop: 2077 break 2078 for p in ensure_list(prop): 2079 properties.append(p) 2080 2081 if properties: 2082 return self.expression(exp.Properties, expressions=properties) 2083 2084 return None 2085 2086 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2087 return self.expression( 2088 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2089 ) 2090 2091 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2092 if self._match_texts(("DEFINER", "INVOKER")): 2093 security_specifier = self._prev.text.upper() 2094 return self.expression(exp.SecurityProperty, this=security_specifier) 2095 return None 2096 2097 def _parse_settings_property(self) -> exp.SettingsProperty: 2098 return self.expression( 2099 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2100 ) 2101 2102 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2103 if self._index >= 2: 2104 pre_volatile_token = self._tokens[self._index - 2] 2105 else: 2106 pre_volatile_token = None 2107 2108 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2109 return exp.VolatileProperty() 2110 2111 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2112 2113 def _parse_retention_period(self) -> exp.Var: 2114 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2115 number = self._parse_number() 2116 number_str = f"{number} " if number else "" 2117 unit = self._parse_var(any_token=True) 2118 return exp.var(f"{number_str}{unit}") 2119 2120 def _parse_system_versioning_property( 2121 self, with_: bool = False 2122 ) -> exp.WithSystemVersioningProperty: 2123 self._match(TokenType.EQ) 2124 prop = self.expression( 2125 exp.WithSystemVersioningProperty, 2126 **{ # type: ignore 2127 "on": True, 2128 "with": with_, 2129 }, 2130 ) 2131 2132 if self._match_text_seq("OFF"): 2133 prop.set("on", False) 2134 return prop 2135 2136 self._match(TokenType.ON) 2137 if self._match(TokenType.L_PAREN): 2138 while self._curr and not self._match(TokenType.R_PAREN): 2139 if self._match_text_seq("HISTORY_TABLE", "="): 2140 prop.set("this", self._parse_table_parts()) 2141 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2142 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2143 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2144 prop.set("retention_period", self._parse_retention_period()) 2145 2146 self._match(TokenType.COMMA) 2147 2148 return prop 2149 2150 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2151 self._match(TokenType.EQ) 2152 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2153 prop = self.expression(exp.DataDeletionProperty, on=on) 2154 2155 if self._match(TokenType.L_PAREN): 2156 while self._curr and not self._match(TokenType.R_PAREN): 2157 if self._match_text_seq("FILTER_COLUMN", "="): 2158 prop.set("filter_column", self._parse_column()) 2159 elif self._match_text_seq("RETENTION_PERIOD", "="): 2160 prop.set("retention_period", self._parse_retention_period()) 2161 2162 self._match(TokenType.COMMA) 2163 2164 return prop 2165 2166 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2167 kind = "HASH" 2168 expressions: t.Optional[t.List[exp.Expression]] = None 2169 if self._match_text_seq("BY", "HASH"): 2170 expressions = self._parse_wrapped_csv(self._parse_id_var) 2171 elif self._match_text_seq("BY", "RANDOM"): 2172 kind = "RANDOM" 2173 2174 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2175 buckets: t.Optional[exp.Expression] = None 2176 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2177 buckets = self._parse_number() 2178 2179 return self.expression( 2180 exp.DistributedByProperty, 2181 expressions=expressions, 2182 kind=kind, 2183 buckets=buckets, 2184 order=self._parse_order(), 2185 ) 2186 2187 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2188 self._match_text_seq("KEY") 2189 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2190 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2191 2192 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2193 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2194 prop = self._parse_system_versioning_property(with_=True) 2195 self._match_r_paren() 2196 return prop 2197 2198 if self._match(TokenType.L_PAREN, advance=False): 2199 return self._parse_wrapped_properties() 2200 2201 if self._match_text_seq("JOURNAL"): 2202 return self._parse_withjournaltable() 2203 2204 if self._match_texts(self.VIEW_ATTRIBUTES): 2205 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2206 2207 if self._match_text_seq("DATA"): 2208 return self._parse_withdata(no=False) 2209 elif self._match_text_seq("NO", "DATA"): 2210 return self._parse_withdata(no=True) 2211 2212 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2213 return self._parse_serde_properties(with_=True) 2214 2215 if self._match(TokenType.SCHEMA): 2216 return self.expression( 2217 exp.WithSchemaBindingProperty, 2218 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2219 ) 2220 2221 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2222 return self.expression( 2223 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2224 ) 2225 2226 if not self._next: 2227 return None 2228 2229 return self._parse_withisolatedloading() 2230 2231 def _parse_procedure_option(self) -> exp.Expression | None: 2232 if self._match_text_seq("EXECUTE", "AS"): 2233 return self.expression( 2234 exp.ExecuteAsProperty, 2235 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2236 or self._parse_string(), 2237 ) 2238 2239 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2240 2241 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2242 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2243 self._match(TokenType.EQ) 2244 2245 user = self._parse_id_var() 2246 self._match(TokenType.PARAMETER) 2247 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2248 2249 if not user or not host: 2250 return None 2251 2252 return exp.DefinerProperty(this=f"{user}@{host}") 2253 2254 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2255 self._match(TokenType.TABLE) 2256 self._match(TokenType.EQ) 2257 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2258 2259 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2260 return self.expression(exp.LogProperty, no=no) 2261 2262 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2263 return self.expression(exp.JournalProperty, **kwargs) 2264 2265 def _parse_checksum(self) -> exp.ChecksumProperty: 2266 self._match(TokenType.EQ) 2267 2268 on = None 2269 if self._match(TokenType.ON): 2270 on = True 2271 elif self._match_text_seq("OFF"): 2272 on = False 2273 2274 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2275 2276 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2277 return self.expression( 2278 exp.Cluster, 2279 expressions=( 2280 self._parse_wrapped_csv(self._parse_ordered) 2281 if wrapped 2282 else self._parse_csv(self._parse_ordered) 2283 ), 2284 ) 2285 2286 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2287 self._match_text_seq("BY") 2288 2289 self._match_l_paren() 2290 expressions = self._parse_csv(self._parse_column) 2291 self._match_r_paren() 2292 2293 if self._match_text_seq("SORTED", "BY"): 2294 self._match_l_paren() 2295 sorted_by = self._parse_csv(self._parse_ordered) 2296 self._match_r_paren() 2297 else: 2298 sorted_by = None 2299 2300 self._match(TokenType.INTO) 2301 buckets = self._parse_number() 2302 self._match_text_seq("BUCKETS") 2303 2304 return self.expression( 2305 exp.ClusteredByProperty, 2306 expressions=expressions, 2307 sorted_by=sorted_by, 2308 buckets=buckets, 2309 ) 2310 2311 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2312 if not self._match_text_seq("GRANTS"): 2313 self._retreat(self._index - 1) 2314 return None 2315 2316 return self.expression(exp.CopyGrantsProperty) 2317 2318 def _parse_freespace(self) -> exp.FreespaceProperty: 2319 self._match(TokenType.EQ) 2320 return self.expression( 2321 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2322 ) 2323 2324 def _parse_mergeblockratio( 2325 self, no: bool = False, default: bool = False 2326 ) -> exp.MergeBlockRatioProperty: 2327 if self._match(TokenType.EQ): 2328 return self.expression( 2329 exp.MergeBlockRatioProperty, 2330 this=self._parse_number(), 2331 percent=self._match(TokenType.PERCENT), 2332 ) 2333 2334 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2335 2336 def _parse_datablocksize( 2337 self, 2338 default: t.Optional[bool] = None, 2339 minimum: t.Optional[bool] = None, 2340 maximum: t.Optional[bool] = None, 2341 ) -> exp.DataBlocksizeProperty: 2342 self._match(TokenType.EQ) 2343 size = self._parse_number() 2344 2345 units = None 2346 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2347 units = self._prev.text 2348 2349 return self.expression( 2350 exp.DataBlocksizeProperty, 2351 size=size, 2352 units=units, 2353 default=default, 2354 minimum=minimum, 2355 maximum=maximum, 2356 ) 2357 2358 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2359 self._match(TokenType.EQ) 2360 always = self._match_text_seq("ALWAYS") 2361 manual = self._match_text_seq("MANUAL") 2362 never = self._match_text_seq("NEVER") 2363 default = self._match_text_seq("DEFAULT") 2364 2365 autotemp = None 2366 if self._match_text_seq("AUTOTEMP"): 2367 autotemp = self._parse_schema() 2368 2369 return self.expression( 2370 exp.BlockCompressionProperty, 2371 always=always, 2372 manual=manual, 2373 never=never, 2374 default=default, 2375 autotemp=autotemp, 2376 ) 2377 2378 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2379 index = self._index 2380 no = self._match_text_seq("NO") 2381 concurrent = self._match_text_seq("CONCURRENT") 2382 2383 if not self._match_text_seq("ISOLATED", "LOADING"): 2384 self._retreat(index) 2385 return None 2386 2387 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2388 return self.expression( 2389 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2390 ) 2391 2392 def _parse_locking(self) -> exp.LockingProperty: 2393 if self._match(TokenType.TABLE): 2394 kind = "TABLE" 2395 elif self._match(TokenType.VIEW): 2396 kind = "VIEW" 2397 elif self._match(TokenType.ROW): 2398 kind = "ROW" 2399 elif self._match_text_seq("DATABASE"): 2400 kind = "DATABASE" 2401 else: 2402 kind = None 2403 2404 if kind in ("DATABASE", "TABLE", "VIEW"): 2405 this = self._parse_table_parts() 2406 else: 2407 this = None 2408 2409 if self._match(TokenType.FOR): 2410 for_or_in = "FOR" 2411 elif self._match(TokenType.IN): 2412 for_or_in = "IN" 2413 else: 2414 for_or_in = None 2415 2416 if self._match_text_seq("ACCESS"): 2417 lock_type = "ACCESS" 2418 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2419 lock_type = "EXCLUSIVE" 2420 elif self._match_text_seq("SHARE"): 2421 lock_type = "SHARE" 2422 elif self._match_text_seq("READ"): 2423 lock_type = "READ" 2424 elif self._match_text_seq("WRITE"): 2425 lock_type = "WRITE" 2426 elif self._match_text_seq("CHECKSUM"): 2427 lock_type = "CHECKSUM" 2428 else: 2429 lock_type = None 2430 2431 override = self._match_text_seq("OVERRIDE") 2432 2433 return self.expression( 2434 exp.LockingProperty, 2435 this=this, 2436 kind=kind, 2437 for_or_in=for_or_in, 2438 lock_type=lock_type, 2439 override=override, 2440 ) 2441 2442 def _parse_partition_by(self) -> t.List[exp.Expression]: 2443 if self._match(TokenType.PARTITION_BY): 2444 return self._parse_csv(self._parse_assignment) 2445 return [] 2446 2447 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2448 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2449 if self._match_text_seq("MINVALUE"): 2450 return exp.var("MINVALUE") 2451 if self._match_text_seq("MAXVALUE"): 2452 return exp.var("MAXVALUE") 2453 return self._parse_bitwise() 2454 2455 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2456 expression = None 2457 from_expressions = None 2458 to_expressions = None 2459 2460 if self._match(TokenType.IN): 2461 this = self._parse_wrapped_csv(self._parse_bitwise) 2462 elif self._match(TokenType.FROM): 2463 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2464 self._match_text_seq("TO") 2465 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2466 elif self._match_text_seq("WITH", "(", "MODULUS"): 2467 this = self._parse_number() 2468 self._match_text_seq(",", "REMAINDER") 2469 expression = self._parse_number() 2470 self._match_r_paren() 2471 else: 2472 self.raise_error("Failed to parse partition bound spec.") 2473 2474 return self.expression( 2475 exp.PartitionBoundSpec, 2476 this=this, 2477 expression=expression, 2478 from_expressions=from_expressions, 2479 to_expressions=to_expressions, 2480 ) 2481 2482 # https://www.postgresql.org/docs/current/sql-createtable.html 2483 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2484 if not self._match_text_seq("OF"): 2485 self._retreat(self._index - 1) 2486 return None 2487 2488 this = self._parse_table(schema=True) 2489 2490 if self._match(TokenType.DEFAULT): 2491 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2492 elif self._match_text_seq("FOR", "VALUES"): 2493 expression = self._parse_partition_bound_spec() 2494 else: 2495 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2496 2497 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2498 2499 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2500 self._match(TokenType.EQ) 2501 return self.expression( 2502 exp.PartitionedByProperty, 2503 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2504 ) 2505 2506 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2507 if self._match_text_seq("AND", "STATISTICS"): 2508 statistics = True 2509 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2510 statistics = False 2511 else: 2512 statistics = None 2513 2514 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2515 2516 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2517 if self._match_text_seq("SQL"): 2518 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2519 return None 2520 2521 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2522 if self._match_text_seq("SQL", "DATA"): 2523 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2524 return None 2525 2526 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2527 if self._match_text_seq("PRIMARY", "INDEX"): 2528 return exp.NoPrimaryIndexProperty() 2529 if self._match_text_seq("SQL"): 2530 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2531 return None 2532 2533 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2534 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2535 return exp.OnCommitProperty() 2536 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2537 return exp.OnCommitProperty(delete=True) 2538 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2539 2540 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2541 if self._match_text_seq("SQL", "DATA"): 2542 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2543 return None 2544 2545 def _parse_distkey(self) -> exp.DistKeyProperty: 2546 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2547 2548 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2549 table = self._parse_table(schema=True) 2550 2551 options = [] 2552 while self._match_texts(("INCLUDING", "EXCLUDING")): 2553 this = self._prev.text.upper() 2554 2555 id_var = self._parse_id_var() 2556 if not id_var: 2557 return None 2558 2559 options.append( 2560 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2561 ) 2562 2563 return self.expression(exp.LikeProperty, this=table, expressions=options) 2564 2565 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2566 return self.expression( 2567 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2568 ) 2569 2570 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2571 self._match(TokenType.EQ) 2572 return self.expression( 2573 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2574 ) 2575 2576 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2577 self._match_text_seq("WITH", "CONNECTION") 2578 return self.expression( 2579 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2580 ) 2581 2582 def _parse_returns(self) -> exp.ReturnsProperty: 2583 value: t.Optional[exp.Expression] 2584 null = None 2585 is_table = self._match(TokenType.TABLE) 2586 2587 if is_table: 2588 if self._match(TokenType.LT): 2589 value = self.expression( 2590 exp.Schema, 2591 this="TABLE", 2592 expressions=self._parse_csv(self._parse_struct_types), 2593 ) 2594 if not self._match(TokenType.GT): 2595 self.raise_error("Expecting >") 2596 else: 2597 value = self._parse_schema(exp.var("TABLE")) 2598 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2599 null = True 2600 value = None 2601 else: 2602 value = self._parse_types() 2603 2604 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2605 2606 def _parse_describe(self) -> exp.Describe: 2607 kind = self._match_set(self.CREATABLES) and self._prev.text 2608 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2609 if self._match(TokenType.DOT): 2610 style = None 2611 self._retreat(self._index - 2) 2612 this = self._parse_table(schema=True) 2613 properties = self._parse_properties() 2614 expressions = properties.expressions if properties else None 2615 partition = self._parse_partition() 2616 return self.expression( 2617 exp.Describe, 2618 this=this, 2619 style=style, 2620 kind=kind, 2621 expressions=expressions, 2622 partition=partition, 2623 ) 2624 2625 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2626 kind = self._prev.text.upper() 2627 expressions = [] 2628 2629 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2630 if self._match(TokenType.WHEN): 2631 expression = self._parse_disjunction() 2632 self._match(TokenType.THEN) 2633 else: 2634 expression = None 2635 2636 else_ = self._match(TokenType.ELSE) 2637 2638 if not self._match(TokenType.INTO): 2639 return None 2640 2641 return self.expression( 2642 exp.ConditionalInsert, 2643 this=self.expression( 2644 exp.Insert, 2645 this=self._parse_table(schema=True), 2646 expression=self._parse_derived_table_values(), 2647 ), 2648 expression=expression, 2649 else_=else_, 2650 ) 2651 2652 expression = parse_conditional_insert() 2653 while expression is not None: 2654 expressions.append(expression) 2655 expression = parse_conditional_insert() 2656 2657 return self.expression( 2658 exp.MultitableInserts, 2659 kind=kind, 2660 comments=comments, 2661 expressions=expressions, 2662 source=self._parse_table(), 2663 ) 2664 2665 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2666 comments = [] 2667 hint = self._parse_hint() 2668 overwrite = self._match(TokenType.OVERWRITE) 2669 ignore = self._match(TokenType.IGNORE) 2670 local = self._match_text_seq("LOCAL") 2671 alternative = None 2672 is_function = None 2673 2674 if self._match_text_seq("DIRECTORY"): 2675 this: t.Optional[exp.Expression] = self.expression( 2676 exp.Directory, 2677 this=self._parse_var_or_string(), 2678 local=local, 2679 row_format=self._parse_row_format(match_row=True), 2680 ) 2681 else: 2682 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2683 comments += ensure_list(self._prev_comments) 2684 return self._parse_multitable_inserts(comments) 2685 2686 if self._match(TokenType.OR): 2687 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2688 2689 self._match(TokenType.INTO) 2690 comments += ensure_list(self._prev_comments) 2691 self._match(TokenType.TABLE) 2692 is_function = self._match(TokenType.FUNCTION) 2693 2694 this = ( 2695 self._parse_table(schema=True, parse_partition=True) 2696 if not is_function 2697 else self._parse_function() 2698 ) 2699 2700 returning = self._parse_returning() 2701 2702 return self.expression( 2703 exp.Insert, 2704 comments=comments, 2705 hint=hint, 2706 is_function=is_function, 2707 this=this, 2708 stored=self._match_text_seq("STORED") and self._parse_stored(), 2709 by_name=self._match_text_seq("BY", "NAME"), 2710 exists=self._parse_exists(), 2711 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2712 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2713 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2714 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2715 conflict=self._parse_on_conflict(), 2716 returning=returning or self._parse_returning(), 2717 overwrite=overwrite, 2718 alternative=alternative, 2719 ignore=ignore, 2720 source=self._match(TokenType.TABLE) and self._parse_table(), 2721 ) 2722 2723 def _parse_kill(self) -> exp.Kill: 2724 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2725 2726 return self.expression( 2727 exp.Kill, 2728 this=self._parse_primary(), 2729 kind=kind, 2730 ) 2731 2732 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2733 conflict = self._match_text_seq("ON", "CONFLICT") 2734 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2735 2736 if not conflict and not duplicate: 2737 return None 2738 2739 conflict_keys = None 2740 constraint = None 2741 2742 if conflict: 2743 if self._match_text_seq("ON", "CONSTRAINT"): 2744 constraint = self._parse_id_var() 2745 elif self._match(TokenType.L_PAREN): 2746 conflict_keys = self._parse_csv(self._parse_id_var) 2747 self._match_r_paren() 2748 2749 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2750 if self._prev.token_type == TokenType.UPDATE: 2751 self._match(TokenType.SET) 2752 expressions = self._parse_csv(self._parse_equality) 2753 else: 2754 expressions = None 2755 2756 return self.expression( 2757 exp.OnConflict, 2758 duplicate=duplicate, 2759 expressions=expressions, 2760 action=action, 2761 conflict_keys=conflict_keys, 2762 constraint=constraint, 2763 ) 2764 2765 def _parse_returning(self) -> t.Optional[exp.Returning]: 2766 if not self._match(TokenType.RETURNING): 2767 return None 2768 return self.expression( 2769 exp.Returning, 2770 expressions=self._parse_csv(self._parse_expression), 2771 into=self._match(TokenType.INTO) and self._parse_table_part(), 2772 ) 2773 2774 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2775 if not self._match(TokenType.FORMAT): 2776 return None 2777 return self._parse_row_format() 2778 2779 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2780 index = self._index 2781 with_ = with_ or self._match_text_seq("WITH") 2782 2783 if not self._match(TokenType.SERDE_PROPERTIES): 2784 self._retreat(index) 2785 return None 2786 return self.expression( 2787 exp.SerdeProperties, 2788 **{ # type: ignore 2789 "expressions": self._parse_wrapped_properties(), 2790 "with": with_, 2791 }, 2792 ) 2793 2794 def _parse_row_format( 2795 self, match_row: bool = False 2796 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2797 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2798 return None 2799 2800 if self._match_text_seq("SERDE"): 2801 this = self._parse_string() 2802 2803 serde_properties = self._parse_serde_properties() 2804 2805 return self.expression( 2806 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2807 ) 2808 2809 self._match_text_seq("DELIMITED") 2810 2811 kwargs = {} 2812 2813 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2814 kwargs["fields"] = self._parse_string() 2815 if self._match_text_seq("ESCAPED", "BY"): 2816 kwargs["escaped"] = self._parse_string() 2817 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2818 kwargs["collection_items"] = self._parse_string() 2819 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2820 kwargs["map_keys"] = self._parse_string() 2821 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2822 kwargs["lines"] = self._parse_string() 2823 if self._match_text_seq("NULL", "DEFINED", "AS"): 2824 kwargs["null"] = self._parse_string() 2825 2826 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2827 2828 def _parse_load(self) -> exp.LoadData | exp.Command: 2829 if self._match_text_seq("DATA"): 2830 local = self._match_text_seq("LOCAL") 2831 self._match_text_seq("INPATH") 2832 inpath = self._parse_string() 2833 overwrite = self._match(TokenType.OVERWRITE) 2834 self._match_pair(TokenType.INTO, TokenType.TABLE) 2835 2836 return self.expression( 2837 exp.LoadData, 2838 this=self._parse_table(schema=True), 2839 local=local, 2840 overwrite=overwrite, 2841 inpath=inpath, 2842 partition=self._parse_partition(), 2843 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2844 serde=self._match_text_seq("SERDE") and self._parse_string(), 2845 ) 2846 return self._parse_as_command(self._prev) 2847 2848 def _parse_delete(self) -> exp.Delete: 2849 # This handles MySQL's "Multiple-Table Syntax" 2850 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2851 tables = None 2852 if not self._match(TokenType.FROM, advance=False): 2853 tables = self._parse_csv(self._parse_table) or None 2854 2855 returning = self._parse_returning() 2856 2857 return self.expression( 2858 exp.Delete, 2859 tables=tables, 2860 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2861 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2862 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2863 where=self._parse_where(), 2864 returning=returning or self._parse_returning(), 2865 limit=self._parse_limit(), 2866 ) 2867 2868 def _parse_update(self) -> exp.Update: 2869 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2870 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2871 returning = self._parse_returning() 2872 return self.expression( 2873 exp.Update, 2874 **{ # type: ignore 2875 "this": this, 2876 "expressions": expressions, 2877 "from": self._parse_from(joins=True), 2878 "where": self._parse_where(), 2879 "returning": returning or self._parse_returning(), 2880 "order": self._parse_order(), 2881 "limit": self._parse_limit(), 2882 }, 2883 ) 2884 2885 def _parse_uncache(self) -> exp.Uncache: 2886 if not self._match(TokenType.TABLE): 2887 self.raise_error("Expecting TABLE after UNCACHE") 2888 2889 return self.expression( 2890 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2891 ) 2892 2893 def _parse_cache(self) -> exp.Cache: 2894 lazy = self._match_text_seq("LAZY") 2895 self._match(TokenType.TABLE) 2896 table = self._parse_table(schema=True) 2897 2898 options = [] 2899 if self._match_text_seq("OPTIONS"): 2900 self._match_l_paren() 2901 k = self._parse_string() 2902 self._match(TokenType.EQ) 2903 v = self._parse_string() 2904 options = [k, v] 2905 self._match_r_paren() 2906 2907 self._match(TokenType.ALIAS) 2908 return self.expression( 2909 exp.Cache, 2910 this=table, 2911 lazy=lazy, 2912 options=options, 2913 expression=self._parse_select(nested=True), 2914 ) 2915 2916 def _parse_partition(self) -> t.Optional[exp.Partition]: 2917 if not self._match(TokenType.PARTITION): 2918 return None 2919 2920 return self.expression( 2921 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2922 ) 2923 2924 def _parse_value(self) -> t.Optional[exp.Tuple]: 2925 if self._match(TokenType.L_PAREN): 2926 expressions = self._parse_csv(self._parse_expression) 2927 self._match_r_paren() 2928 return self.expression(exp.Tuple, expressions=expressions) 2929 2930 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2931 expression = self._parse_expression() 2932 if expression: 2933 return self.expression(exp.Tuple, expressions=[expression]) 2934 return None 2935 2936 def _parse_projections(self) -> t.List[exp.Expression]: 2937 return self._parse_expressions() 2938 2939 def _parse_select( 2940 self, 2941 nested: bool = False, 2942 table: bool = False, 2943 parse_subquery_alias: bool = True, 2944 parse_set_operation: bool = True, 2945 ) -> t.Optional[exp.Expression]: 2946 cte = self._parse_with() 2947 2948 if cte: 2949 this = self._parse_statement() 2950 2951 if not this: 2952 self.raise_error("Failed to parse any statement following CTE") 2953 return cte 2954 2955 if "with" in this.arg_types: 2956 this.set("with", cte) 2957 else: 2958 self.raise_error(f"{this.key} does not support CTE") 2959 this = cte 2960 2961 return this 2962 2963 # duckdb supports leading with FROM x 2964 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2965 2966 if self._match(TokenType.SELECT): 2967 comments = self._prev_comments 2968 2969 hint = self._parse_hint() 2970 2971 if self._next and not self._next.token_type == TokenType.DOT: 2972 all_ = self._match(TokenType.ALL) 2973 distinct = self._match_set(self.DISTINCT_TOKENS) 2974 else: 2975 all_, distinct = None, None 2976 2977 kind = ( 2978 self._match(TokenType.ALIAS) 2979 and self._match_texts(("STRUCT", "VALUE")) 2980 and self._prev.text.upper() 2981 ) 2982 2983 if distinct: 2984 distinct = self.expression( 2985 exp.Distinct, 2986 on=self._parse_value() if self._match(TokenType.ON) else None, 2987 ) 2988 2989 if all_ and distinct: 2990 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2991 2992 operation_modifiers = [] 2993 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2994 operation_modifiers.append(exp.var(self._prev.text.upper())) 2995 2996 limit = self._parse_limit(top=True) 2997 projections = self._parse_projections() 2998 2999 this = self.expression( 3000 exp.Select, 3001 kind=kind, 3002 hint=hint, 3003 distinct=distinct, 3004 expressions=projections, 3005 limit=limit, 3006 operation_modifiers=operation_modifiers or None, 3007 ) 3008 this.comments = comments 3009 3010 into = self._parse_into() 3011 if into: 3012 this.set("into", into) 3013 3014 if not from_: 3015 from_ = self._parse_from() 3016 3017 if from_: 3018 this.set("from", from_) 3019 3020 this = self._parse_query_modifiers(this) 3021 elif (table or nested) and self._match(TokenType.L_PAREN): 3022 if self._match(TokenType.PIVOT): 3023 this = self._parse_simplified_pivot() 3024 elif self._match(TokenType.FROM): 3025 this = exp.select("*").from_( 3026 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3027 ) 3028 else: 3029 this = ( 3030 self._parse_table() 3031 if table 3032 else self._parse_select(nested=True, parse_set_operation=False) 3033 ) 3034 3035 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3036 # in case a modifier (e.g. join) is following 3037 if table and isinstance(this, exp.Values) and this.alias: 3038 alias = this.args["alias"].pop() 3039 this = exp.Table(this=this, alias=alias) 3040 3041 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3042 3043 self._match_r_paren() 3044 3045 # We return early here so that the UNION isn't attached to the subquery by the 3046 # following call to _parse_set_operations, but instead becomes the parent node 3047 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3048 elif self._match(TokenType.VALUES, advance=False): 3049 this = self._parse_derived_table_values() 3050 elif from_: 3051 this = exp.select("*").from_(from_.this, copy=False) 3052 elif self._match(TokenType.SUMMARIZE): 3053 table = self._match(TokenType.TABLE) 3054 this = self._parse_select() or self._parse_string() or self._parse_table() 3055 return self.expression(exp.Summarize, this=this, table=table) 3056 elif self._match(TokenType.DESCRIBE): 3057 this = self._parse_describe() 3058 elif self._match_text_seq("STREAM"): 3059 this = self._parse_function() 3060 if this: 3061 this = self.expression(exp.Stream, this=this) 3062 else: 3063 self._retreat(self._index - 1) 3064 else: 3065 this = None 3066 3067 return self._parse_set_operations(this) if parse_set_operation else this 3068 3069 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3070 if not skip_with_token and not self._match(TokenType.WITH): 3071 return None 3072 3073 comments = self._prev_comments 3074 recursive = self._match(TokenType.RECURSIVE) 3075 3076 last_comments = None 3077 expressions = [] 3078 while True: 3079 expressions.append(self._parse_cte()) 3080 if last_comments: 3081 expressions[-1].add_comments(last_comments) 3082 3083 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3084 break 3085 else: 3086 self._match(TokenType.WITH) 3087 3088 last_comments = self._prev_comments 3089 3090 return self.expression( 3091 exp.With, comments=comments, expressions=expressions, recursive=recursive 3092 ) 3093 3094 def _parse_cte(self) -> exp.CTE: 3095 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3096 if not alias or not alias.this: 3097 self.raise_error("Expected CTE to have alias") 3098 3099 self._match(TokenType.ALIAS) 3100 comments = self._prev_comments 3101 3102 if self._match_text_seq("NOT", "MATERIALIZED"): 3103 materialized = False 3104 elif self._match_text_seq("MATERIALIZED"): 3105 materialized = True 3106 else: 3107 materialized = None 3108 3109 return self.expression( 3110 exp.CTE, 3111 this=self._parse_wrapped(self._parse_statement), 3112 alias=alias, 3113 materialized=materialized, 3114 comments=comments, 3115 ) 3116 3117 def _parse_table_alias( 3118 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3119 ) -> t.Optional[exp.TableAlias]: 3120 any_token = self._match(TokenType.ALIAS) 3121 alias = ( 3122 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3123 or self._parse_string_as_identifier() 3124 ) 3125 3126 index = self._index 3127 if self._match(TokenType.L_PAREN): 3128 columns = self._parse_csv(self._parse_function_parameter) 3129 self._match_r_paren() if columns else self._retreat(index) 3130 else: 3131 columns = None 3132 3133 if not alias and not columns: 3134 return None 3135 3136 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3137 3138 # We bubble up comments from the Identifier to the TableAlias 3139 if isinstance(alias, exp.Identifier): 3140 table_alias.add_comments(alias.pop_comments()) 3141 3142 return table_alias 3143 3144 def _parse_subquery( 3145 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3146 ) -> t.Optional[exp.Subquery]: 3147 if not this: 3148 return None 3149 3150 return self.expression( 3151 exp.Subquery, 3152 this=this, 3153 pivots=self._parse_pivots(), 3154 alias=self._parse_table_alias() if parse_alias else None, 3155 sample=self._parse_table_sample(), 3156 ) 3157 3158 def _implicit_unnests_to_explicit(self, this: E) -> E: 3159 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3160 3161 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3162 for i, join in enumerate(this.args.get("joins") or []): 3163 table = join.this 3164 normalized_table = table.copy() 3165 normalized_table.meta["maybe_column"] = True 3166 normalized_table = _norm(normalized_table, dialect=self.dialect) 3167 3168 if isinstance(table, exp.Table) and not join.args.get("on"): 3169 if normalized_table.parts[0].name in refs: 3170 table_as_column = table.to_column() 3171 unnest = exp.Unnest(expressions=[table_as_column]) 3172 3173 # Table.to_column creates a parent Alias node that we want to convert to 3174 # a TableAlias and attach to the Unnest, so it matches the parser's output 3175 if isinstance(table.args.get("alias"), exp.TableAlias): 3176 table_as_column.replace(table_as_column.this) 3177 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3178 3179 table.replace(unnest) 3180 3181 refs.add(normalized_table.alias_or_name) 3182 3183 return this 3184 3185 def _parse_query_modifiers( 3186 self, this: t.Optional[exp.Expression] 3187 ) -> t.Optional[exp.Expression]: 3188 if isinstance(this, (exp.Query, exp.Table)): 3189 for join in self._parse_joins(): 3190 this.append("joins", join) 3191 for lateral in iter(self._parse_lateral, None): 3192 this.append("laterals", lateral) 3193 3194 while True: 3195 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3196 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3197 key, expression = parser(self) 3198 3199 if expression: 3200 this.set(key, expression) 3201 if key == "limit": 3202 offset = expression.args.pop("offset", None) 3203 3204 if offset: 3205 offset = exp.Offset(expression=offset) 3206 this.set("offset", offset) 3207 3208 limit_by_expressions = expression.expressions 3209 expression.set("expressions", None) 3210 offset.set("expressions", limit_by_expressions) 3211 continue 3212 break 3213 3214 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3215 this = self._implicit_unnests_to_explicit(this) 3216 3217 return this 3218 3219 def _parse_hint(self) -> t.Optional[exp.Hint]: 3220 if self._match(TokenType.HINT): 3221 hints = [] 3222 for hint in iter( 3223 lambda: self._parse_csv( 3224 lambda: self._parse_function() or self._parse_var(upper=True) 3225 ), 3226 [], 3227 ): 3228 hints.extend(hint) 3229 3230 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3231 self.raise_error("Expected */ after HINT") 3232 3233 return self.expression(exp.Hint, expressions=hints) 3234 3235 return None 3236 3237 def _parse_into(self) -> t.Optional[exp.Into]: 3238 if not self._match(TokenType.INTO): 3239 return None 3240 3241 temp = self._match(TokenType.TEMPORARY) 3242 unlogged = self._match_text_seq("UNLOGGED") 3243 self._match(TokenType.TABLE) 3244 3245 return self.expression( 3246 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3247 ) 3248 3249 def _parse_from( 3250 self, joins: bool = False, skip_from_token: bool = False 3251 ) -> t.Optional[exp.From]: 3252 if not skip_from_token and not self._match(TokenType.FROM): 3253 return None 3254 3255 return self.expression( 3256 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3257 ) 3258 3259 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3260 return self.expression( 3261 exp.MatchRecognizeMeasure, 3262 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3263 this=self._parse_expression(), 3264 ) 3265 3266 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3267 if not self._match(TokenType.MATCH_RECOGNIZE): 3268 return None 3269 3270 self._match_l_paren() 3271 3272 partition = self._parse_partition_by() 3273 order = self._parse_order() 3274 3275 measures = ( 3276 self._parse_csv(self._parse_match_recognize_measure) 3277 if self._match_text_seq("MEASURES") 3278 else None 3279 ) 3280 3281 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3282 rows = exp.var("ONE ROW PER MATCH") 3283 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3284 text = "ALL ROWS PER MATCH" 3285 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3286 text += " SHOW EMPTY MATCHES" 3287 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3288 text += " OMIT EMPTY MATCHES" 3289 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3290 text += " WITH UNMATCHED ROWS" 3291 rows = exp.var(text) 3292 else: 3293 rows = None 3294 3295 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3296 text = "AFTER MATCH SKIP" 3297 if self._match_text_seq("PAST", "LAST", "ROW"): 3298 text += " PAST LAST ROW" 3299 elif self._match_text_seq("TO", "NEXT", "ROW"): 3300 text += " TO NEXT ROW" 3301 elif self._match_text_seq("TO", "FIRST"): 3302 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3303 elif self._match_text_seq("TO", "LAST"): 3304 text += f" TO LAST {self._advance_any().text}" # type: ignore 3305 after = exp.var(text) 3306 else: 3307 after = None 3308 3309 if self._match_text_seq("PATTERN"): 3310 self._match_l_paren() 3311 3312 if not self._curr: 3313 self.raise_error("Expecting )", self._curr) 3314 3315 paren = 1 3316 start = self._curr 3317 3318 while self._curr and paren > 0: 3319 if self._curr.token_type == TokenType.L_PAREN: 3320 paren += 1 3321 if self._curr.token_type == TokenType.R_PAREN: 3322 paren -= 1 3323 3324 end = self._prev 3325 self._advance() 3326 3327 if paren > 0: 3328 self.raise_error("Expecting )", self._curr) 3329 3330 pattern = exp.var(self._find_sql(start, end)) 3331 else: 3332 pattern = None 3333 3334 define = ( 3335 self._parse_csv(self._parse_name_as_expression) 3336 if self._match_text_seq("DEFINE") 3337 else None 3338 ) 3339 3340 self._match_r_paren() 3341 3342 return self.expression( 3343 exp.MatchRecognize, 3344 partition_by=partition, 3345 order=order, 3346 measures=measures, 3347 rows=rows, 3348 after=after, 3349 pattern=pattern, 3350 define=define, 3351 alias=self._parse_table_alias(), 3352 ) 3353 3354 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3355 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3356 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3357 cross_apply = False 3358 3359 if cross_apply is not None: 3360 this = self._parse_select(table=True) 3361 view = None 3362 outer = None 3363 elif self._match(TokenType.LATERAL): 3364 this = self._parse_select(table=True) 3365 view = self._match(TokenType.VIEW) 3366 outer = self._match(TokenType.OUTER) 3367 else: 3368 return None 3369 3370 if not this: 3371 this = ( 3372 self._parse_unnest() 3373 or self._parse_function() 3374 or self._parse_id_var(any_token=False) 3375 ) 3376 3377 while self._match(TokenType.DOT): 3378 this = exp.Dot( 3379 this=this, 3380 expression=self._parse_function() or self._parse_id_var(any_token=False), 3381 ) 3382 3383 if view: 3384 table = self._parse_id_var(any_token=False) 3385 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3386 table_alias: t.Optional[exp.TableAlias] = self.expression( 3387 exp.TableAlias, this=table, columns=columns 3388 ) 3389 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3390 # We move the alias from the lateral's child node to the lateral itself 3391 table_alias = this.args["alias"].pop() 3392 else: 3393 table_alias = self._parse_table_alias() 3394 3395 return self.expression( 3396 exp.Lateral, 3397 this=this, 3398 view=view, 3399 outer=outer, 3400 alias=table_alias, 3401 cross_apply=cross_apply, 3402 ) 3403 3404 def _parse_join_parts( 3405 self, 3406 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3407 return ( 3408 self._match_set(self.JOIN_METHODS) and self._prev, 3409 self._match_set(self.JOIN_SIDES) and self._prev, 3410 self._match_set(self.JOIN_KINDS) and self._prev, 3411 ) 3412 3413 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3414 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3415 this = self._parse_column() 3416 if isinstance(this, exp.Column): 3417 return this.this 3418 return this 3419 3420 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3421 3422 def _parse_join( 3423 self, skip_join_token: bool = False, parse_bracket: bool = False 3424 ) -> t.Optional[exp.Join]: 3425 if self._match(TokenType.COMMA): 3426 return self.expression(exp.Join, this=self._parse_table()) 3427 3428 index = self._index 3429 method, side, kind = self._parse_join_parts() 3430 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3431 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3432 3433 if not skip_join_token and not join: 3434 self._retreat(index) 3435 kind = None 3436 method = None 3437 side = None 3438 3439 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3440 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3441 3442 if not skip_join_token and not join and not outer_apply and not cross_apply: 3443 return None 3444 3445 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3446 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3447 kwargs["expressions"] = self._parse_csv( 3448 lambda: self._parse_table(parse_bracket=parse_bracket) 3449 ) 3450 3451 if method: 3452 kwargs["method"] = method.text 3453 if side: 3454 kwargs["side"] = side.text 3455 if kind: 3456 kwargs["kind"] = kind.text 3457 if hint: 3458 kwargs["hint"] = hint 3459 3460 if self._match(TokenType.MATCH_CONDITION): 3461 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3462 3463 if self._match(TokenType.ON): 3464 kwargs["on"] = self._parse_assignment() 3465 elif self._match(TokenType.USING): 3466 kwargs["using"] = self._parse_using_identifiers() 3467 elif ( 3468 not (outer_apply or cross_apply) 3469 and not isinstance(kwargs["this"], exp.Unnest) 3470 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3471 ): 3472 index = self._index 3473 joins: t.Optional[list] = list(self._parse_joins()) 3474 3475 if joins and self._match(TokenType.ON): 3476 kwargs["on"] = self._parse_assignment() 3477 elif joins and self._match(TokenType.USING): 3478 kwargs["using"] = self._parse_using_identifiers() 3479 else: 3480 joins = None 3481 self._retreat(index) 3482 3483 kwargs["this"].set("joins", joins if joins else None) 3484 3485 comments = [c for token in (method, side, kind) if token for c in token.comments] 3486 return self.expression(exp.Join, comments=comments, **kwargs) 3487 3488 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3489 this = self._parse_assignment() 3490 3491 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3492 return this 3493 3494 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3495 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3496 3497 return this 3498 3499 def _parse_index_params(self) -> exp.IndexParameters: 3500 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3501 3502 if self._match(TokenType.L_PAREN, advance=False): 3503 columns = self._parse_wrapped_csv(self._parse_with_operator) 3504 else: 3505 columns = None 3506 3507 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3508 partition_by = self._parse_partition_by() 3509 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3510 tablespace = ( 3511 self._parse_var(any_token=True) 3512 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3513 else None 3514 ) 3515 where = self._parse_where() 3516 3517 on = self._parse_field() if self._match(TokenType.ON) else None 3518 3519 return self.expression( 3520 exp.IndexParameters, 3521 using=using, 3522 columns=columns, 3523 include=include, 3524 partition_by=partition_by, 3525 where=where, 3526 with_storage=with_storage, 3527 tablespace=tablespace, 3528 on=on, 3529 ) 3530 3531 def _parse_index( 3532 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3533 ) -> t.Optional[exp.Index]: 3534 if index or anonymous: 3535 unique = None 3536 primary = None 3537 amp = None 3538 3539 self._match(TokenType.ON) 3540 self._match(TokenType.TABLE) # hive 3541 table = self._parse_table_parts(schema=True) 3542 else: 3543 unique = self._match(TokenType.UNIQUE) 3544 primary = self._match_text_seq("PRIMARY") 3545 amp = self._match_text_seq("AMP") 3546 3547 if not self._match(TokenType.INDEX): 3548 return None 3549 3550 index = self._parse_id_var() 3551 table = None 3552 3553 params = self._parse_index_params() 3554 3555 return self.expression( 3556 exp.Index, 3557 this=index, 3558 table=table, 3559 unique=unique, 3560 primary=primary, 3561 amp=amp, 3562 params=params, 3563 ) 3564 3565 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3566 hints: t.List[exp.Expression] = [] 3567 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3568 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3569 hints.append( 3570 self.expression( 3571 exp.WithTableHint, 3572 expressions=self._parse_csv( 3573 lambda: self._parse_function() or self._parse_var(any_token=True) 3574 ), 3575 ) 3576 ) 3577 self._match_r_paren() 3578 else: 3579 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3580 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3581 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3582 3583 self._match_set((TokenType.INDEX, TokenType.KEY)) 3584 if self._match(TokenType.FOR): 3585 hint.set("target", self._advance_any() and self._prev.text.upper()) 3586 3587 hint.set("expressions", self._parse_wrapped_id_vars()) 3588 hints.append(hint) 3589 3590 return hints or None 3591 3592 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3593 return ( 3594 (not schema and self._parse_function(optional_parens=False)) 3595 or self._parse_id_var(any_token=False) 3596 or self._parse_string_as_identifier() 3597 or self._parse_placeholder() 3598 ) 3599 3600 def _parse_table_parts( 3601 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3602 ) -> exp.Table: 3603 catalog = None 3604 db = None 3605 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3606 3607 while self._match(TokenType.DOT): 3608 if catalog: 3609 # This allows nesting the table in arbitrarily many dot expressions if needed 3610 table = self.expression( 3611 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3612 ) 3613 else: 3614 catalog = db 3615 db = table 3616 # "" used for tsql FROM a..b case 3617 table = self._parse_table_part(schema=schema) or "" 3618 3619 if ( 3620 wildcard 3621 and self._is_connected() 3622 and (isinstance(table, exp.Identifier) or not table) 3623 and self._match(TokenType.STAR) 3624 ): 3625 if isinstance(table, exp.Identifier): 3626 table.args["this"] += "*" 3627 else: 3628 table = exp.Identifier(this="*") 3629 3630 # We bubble up comments from the Identifier to the Table 3631 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3632 3633 if is_db_reference: 3634 catalog = db 3635 db = table 3636 table = None 3637 3638 if not table and not is_db_reference: 3639 self.raise_error(f"Expected table name but got {self._curr}") 3640 if not db and is_db_reference: 3641 self.raise_error(f"Expected database name but got {self._curr}") 3642 3643 table = self.expression( 3644 exp.Table, 3645 comments=comments, 3646 this=table, 3647 db=db, 3648 catalog=catalog, 3649 ) 3650 3651 changes = self._parse_changes() 3652 if changes: 3653 table.set("changes", changes) 3654 3655 at_before = self._parse_historical_data() 3656 if at_before: 3657 table.set("when", at_before) 3658 3659 pivots = self._parse_pivots() 3660 if pivots: 3661 table.set("pivots", pivots) 3662 3663 return table 3664 3665 def _parse_table( 3666 self, 3667 schema: bool = False, 3668 joins: bool = False, 3669 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3670 parse_bracket: bool = False, 3671 is_db_reference: bool = False, 3672 parse_partition: bool = False, 3673 ) -> t.Optional[exp.Expression]: 3674 lateral = self._parse_lateral() 3675 if lateral: 3676 return lateral 3677 3678 unnest = self._parse_unnest() 3679 if unnest: 3680 return unnest 3681 3682 values = self._parse_derived_table_values() 3683 if values: 3684 return values 3685 3686 subquery = self._parse_select(table=True) 3687 if subquery: 3688 if not subquery.args.get("pivots"): 3689 subquery.set("pivots", self._parse_pivots()) 3690 return subquery 3691 3692 bracket = parse_bracket and self._parse_bracket(None) 3693 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3694 3695 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3696 self._parse_table 3697 ) 3698 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3699 3700 only = self._match(TokenType.ONLY) 3701 3702 this = t.cast( 3703 exp.Expression, 3704 bracket 3705 or rows_from 3706 or self._parse_bracket( 3707 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3708 ), 3709 ) 3710 3711 if only: 3712 this.set("only", only) 3713 3714 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3715 self._match_text_seq("*") 3716 3717 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3718 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3719 this.set("partition", self._parse_partition()) 3720 3721 if schema: 3722 return self._parse_schema(this=this) 3723 3724 version = self._parse_version() 3725 3726 if version: 3727 this.set("version", version) 3728 3729 if self.dialect.ALIAS_POST_TABLESAMPLE: 3730 this.set("sample", self._parse_table_sample()) 3731 3732 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3733 if alias: 3734 this.set("alias", alias) 3735 3736 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3737 return self.expression( 3738 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3739 ) 3740 3741 this.set("hints", self._parse_table_hints()) 3742 3743 if not this.args.get("pivots"): 3744 this.set("pivots", self._parse_pivots()) 3745 3746 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3747 this.set("sample", self._parse_table_sample()) 3748 3749 if joins: 3750 for join in self._parse_joins(): 3751 this.append("joins", join) 3752 3753 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3754 this.set("ordinality", True) 3755 this.set("alias", self._parse_table_alias()) 3756 3757 return this 3758 3759 def _parse_version(self) -> t.Optional[exp.Version]: 3760 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3761 this = "TIMESTAMP" 3762 elif self._match(TokenType.VERSION_SNAPSHOT): 3763 this = "VERSION" 3764 else: 3765 return None 3766 3767 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3768 kind = self._prev.text.upper() 3769 start = self._parse_bitwise() 3770 self._match_texts(("TO", "AND")) 3771 end = self._parse_bitwise() 3772 expression: t.Optional[exp.Expression] = self.expression( 3773 exp.Tuple, expressions=[start, end] 3774 ) 3775 elif self._match_text_seq("CONTAINED", "IN"): 3776 kind = "CONTAINED IN" 3777 expression = self.expression( 3778 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3779 ) 3780 elif self._match(TokenType.ALL): 3781 kind = "ALL" 3782 expression = None 3783 else: 3784 self._match_text_seq("AS", "OF") 3785 kind = "AS OF" 3786 expression = self._parse_type() 3787 3788 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3789 3790 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3791 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3792 index = self._index 3793 historical_data = None 3794 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3795 this = self._prev.text.upper() 3796 kind = ( 3797 self._match(TokenType.L_PAREN) 3798 and self._match_texts(self.HISTORICAL_DATA_KIND) 3799 and self._prev.text.upper() 3800 ) 3801 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3802 3803 if expression: 3804 self._match_r_paren() 3805 historical_data = self.expression( 3806 exp.HistoricalData, this=this, kind=kind, expression=expression 3807 ) 3808 else: 3809 self._retreat(index) 3810 3811 return historical_data 3812 3813 def _parse_changes(self) -> t.Optional[exp.Changes]: 3814 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3815 return None 3816 3817 information = self._parse_var(any_token=True) 3818 self._match_r_paren() 3819 3820 return self.expression( 3821 exp.Changes, 3822 information=information, 3823 at_before=self._parse_historical_data(), 3824 end=self._parse_historical_data(), 3825 ) 3826 3827 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3828 if not self._match(TokenType.UNNEST): 3829 return None 3830 3831 expressions = self._parse_wrapped_csv(self._parse_equality) 3832 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3833 3834 alias = self._parse_table_alias() if with_alias else None 3835 3836 if alias: 3837 if self.dialect.UNNEST_COLUMN_ONLY: 3838 if alias.args.get("columns"): 3839 self.raise_error("Unexpected extra column alias in unnest.") 3840 3841 alias.set("columns", [alias.this]) 3842 alias.set("this", None) 3843 3844 columns = alias.args.get("columns") or [] 3845 if offset and len(expressions) < len(columns): 3846 offset = columns.pop() 3847 3848 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3849 self._match(TokenType.ALIAS) 3850 offset = self._parse_id_var( 3851 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3852 ) or exp.to_identifier("offset") 3853 3854 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3855 3856 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3857 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3858 if not is_derived and not ( 3859 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3860 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3861 ): 3862 return None 3863 3864 expressions = self._parse_csv(self._parse_value) 3865 alias = self._parse_table_alias() 3866 3867 if is_derived: 3868 self._match_r_paren() 3869 3870 return self.expression( 3871 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3872 ) 3873 3874 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3875 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3876 as_modifier and self._match_text_seq("USING", "SAMPLE") 3877 ): 3878 return None 3879 3880 bucket_numerator = None 3881 bucket_denominator = None 3882 bucket_field = None 3883 percent = None 3884 size = None 3885 seed = None 3886 3887 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3888 matched_l_paren = self._match(TokenType.L_PAREN) 3889 3890 if self.TABLESAMPLE_CSV: 3891 num = None 3892 expressions = self._parse_csv(self._parse_primary) 3893 else: 3894 expressions = None 3895 num = ( 3896 self._parse_factor() 3897 if self._match(TokenType.NUMBER, advance=False) 3898 else self._parse_primary() or self._parse_placeholder() 3899 ) 3900 3901 if self._match_text_seq("BUCKET"): 3902 bucket_numerator = self._parse_number() 3903 self._match_text_seq("OUT", "OF") 3904 bucket_denominator = bucket_denominator = self._parse_number() 3905 self._match(TokenType.ON) 3906 bucket_field = self._parse_field() 3907 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3908 percent = num 3909 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3910 size = num 3911 else: 3912 percent = num 3913 3914 if matched_l_paren: 3915 self._match_r_paren() 3916 3917 if self._match(TokenType.L_PAREN): 3918 method = self._parse_var(upper=True) 3919 seed = self._match(TokenType.COMMA) and self._parse_number() 3920 self._match_r_paren() 3921 elif self._match_texts(("SEED", "REPEATABLE")): 3922 seed = self._parse_wrapped(self._parse_number) 3923 3924 if not method and self.DEFAULT_SAMPLING_METHOD: 3925 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3926 3927 return self.expression( 3928 exp.TableSample, 3929 expressions=expressions, 3930 method=method, 3931 bucket_numerator=bucket_numerator, 3932 bucket_denominator=bucket_denominator, 3933 bucket_field=bucket_field, 3934 percent=percent, 3935 size=size, 3936 seed=seed, 3937 ) 3938 3939 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3940 return list(iter(self._parse_pivot, None)) or None 3941 3942 def _parse_joins(self) -> t.Iterator[exp.Join]: 3943 return iter(self._parse_join, None) 3944 3945 # https://duckdb.org/docs/sql/statements/pivot 3946 def _parse_simplified_pivot(self) -> exp.Pivot: 3947 def _parse_on() -> t.Optional[exp.Expression]: 3948 this = self._parse_bitwise() 3949 return self._parse_in(this) if self._match(TokenType.IN) else this 3950 3951 this = self._parse_table() 3952 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3953 using = self._match(TokenType.USING) and self._parse_csv( 3954 lambda: self._parse_alias(self._parse_function()) 3955 ) 3956 group = self._parse_group() 3957 return self.expression( 3958 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3959 ) 3960 3961 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3962 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3963 this = self._parse_select_or_expression() 3964 3965 self._match(TokenType.ALIAS) 3966 alias = self._parse_bitwise() 3967 if alias: 3968 if isinstance(alias, exp.Column) and not alias.db: 3969 alias = alias.this 3970 return self.expression(exp.PivotAlias, this=this, alias=alias) 3971 3972 return this 3973 3974 value = self._parse_column() 3975 3976 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3977 self.raise_error("Expecting IN (") 3978 3979 if self._match(TokenType.ANY): 3980 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3981 else: 3982 exprs = self._parse_csv(_parse_aliased_expression) 3983 3984 self._match_r_paren() 3985 return self.expression(exp.In, this=value, expressions=exprs) 3986 3987 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3988 index = self._index 3989 include_nulls = None 3990 3991 if self._match(TokenType.PIVOT): 3992 unpivot = False 3993 elif self._match(TokenType.UNPIVOT): 3994 unpivot = True 3995 3996 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3997 if self._match_text_seq("INCLUDE", "NULLS"): 3998 include_nulls = True 3999 elif self._match_text_seq("EXCLUDE", "NULLS"): 4000 include_nulls = False 4001 else: 4002 return None 4003 4004 expressions = [] 4005 4006 if not self._match(TokenType.L_PAREN): 4007 self._retreat(index) 4008 return None 4009 4010 if unpivot: 4011 expressions = self._parse_csv(self._parse_column) 4012 else: 4013 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4014 4015 if not expressions: 4016 self.raise_error("Failed to parse PIVOT's aggregation list") 4017 4018 if not self._match(TokenType.FOR): 4019 self.raise_error("Expecting FOR") 4020 4021 field = self._parse_pivot_in() 4022 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4023 self._parse_bitwise 4024 ) 4025 4026 self._match_r_paren() 4027 4028 pivot = self.expression( 4029 exp.Pivot, 4030 expressions=expressions, 4031 field=field, 4032 unpivot=unpivot, 4033 include_nulls=include_nulls, 4034 default_on_null=default_on_null, 4035 ) 4036 4037 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4038 pivot.set("alias", self._parse_table_alias()) 4039 4040 if not unpivot: 4041 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4042 4043 columns: t.List[exp.Expression] = [] 4044 for fld in pivot.args["field"].expressions: 4045 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4046 for name in names: 4047 if self.PREFIXED_PIVOT_COLUMNS: 4048 name = f"{name}_{field_name}" if name else field_name 4049 else: 4050 name = f"{field_name}_{name}" if name else field_name 4051 4052 columns.append(exp.to_identifier(name)) 4053 4054 pivot.set("columns", columns) 4055 4056 return pivot 4057 4058 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4059 return [agg.alias for agg in aggregations] 4060 4061 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4062 if not skip_where_token and not self._match(TokenType.PREWHERE): 4063 return None 4064 4065 return self.expression( 4066 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4067 ) 4068 4069 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4070 if not skip_where_token and not self._match(TokenType.WHERE): 4071 return None 4072 4073 return self.expression( 4074 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4075 ) 4076 4077 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4078 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4079 return None 4080 4081 elements: t.Dict[str, t.Any] = defaultdict(list) 4082 4083 if self._match(TokenType.ALL): 4084 elements["all"] = True 4085 elif self._match(TokenType.DISTINCT): 4086 elements["all"] = False 4087 4088 while True: 4089 index = self._index 4090 4091 elements["expressions"].extend( 4092 self._parse_csv( 4093 lambda: None 4094 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4095 else self._parse_assignment() 4096 ) 4097 ) 4098 4099 before_with_index = self._index 4100 with_prefix = self._match(TokenType.WITH) 4101 4102 if self._match(TokenType.ROLLUP): 4103 elements["rollup"].append( 4104 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4105 ) 4106 elif self._match(TokenType.CUBE): 4107 elements["cube"].append( 4108 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4109 ) 4110 elif self._match(TokenType.GROUPING_SETS): 4111 elements["grouping_sets"].append( 4112 self.expression( 4113 exp.GroupingSets, 4114 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4115 ) 4116 ) 4117 elif self._match_text_seq("TOTALS"): 4118 elements["totals"] = True # type: ignore 4119 4120 if before_with_index <= self._index <= before_with_index + 1: 4121 self._retreat(before_with_index) 4122 break 4123 4124 if index == self._index: 4125 break 4126 4127 return self.expression(exp.Group, **elements) # type: ignore 4128 4129 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4130 return self.expression( 4131 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4132 ) 4133 4134 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4135 if self._match(TokenType.L_PAREN): 4136 grouping_set = self._parse_csv(self._parse_column) 4137 self._match_r_paren() 4138 return self.expression(exp.Tuple, expressions=grouping_set) 4139 4140 return self._parse_column() 4141 4142 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4143 if not skip_having_token and not self._match(TokenType.HAVING): 4144 return None 4145 return self.expression(exp.Having, this=self._parse_assignment()) 4146 4147 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4148 if not self._match(TokenType.QUALIFY): 4149 return None 4150 return self.expression(exp.Qualify, this=self._parse_assignment()) 4151 4152 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4153 if skip_start_token: 4154 start = None 4155 elif self._match(TokenType.START_WITH): 4156 start = self._parse_assignment() 4157 else: 4158 return None 4159 4160 self._match(TokenType.CONNECT_BY) 4161 nocycle = self._match_text_seq("NOCYCLE") 4162 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4163 exp.Prior, this=self._parse_bitwise() 4164 ) 4165 connect = self._parse_assignment() 4166 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4167 4168 if not start and self._match(TokenType.START_WITH): 4169 start = self._parse_assignment() 4170 4171 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4172 4173 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4174 this = self._parse_id_var(any_token=True) 4175 if self._match(TokenType.ALIAS): 4176 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4177 return this 4178 4179 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4180 if self._match_text_seq("INTERPOLATE"): 4181 return self._parse_wrapped_csv(self._parse_name_as_expression) 4182 return None 4183 4184 def _parse_order( 4185 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4186 ) -> t.Optional[exp.Expression]: 4187 siblings = None 4188 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4189 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4190 return this 4191 4192 siblings = True 4193 4194 return self.expression( 4195 exp.Order, 4196 this=this, 4197 expressions=self._parse_csv(self._parse_ordered), 4198 siblings=siblings, 4199 ) 4200 4201 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4202 if not self._match(token): 4203 return None 4204 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4205 4206 def _parse_ordered( 4207 self, parse_method: t.Optional[t.Callable] = None 4208 ) -> t.Optional[exp.Ordered]: 4209 this = parse_method() if parse_method else self._parse_assignment() 4210 if not this: 4211 return None 4212 4213 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4214 this = exp.var("ALL") 4215 4216 asc = self._match(TokenType.ASC) 4217 desc = self._match(TokenType.DESC) or (asc and False) 4218 4219 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4220 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4221 4222 nulls_first = is_nulls_first or False 4223 explicitly_null_ordered = is_nulls_first or is_nulls_last 4224 4225 if ( 4226 not explicitly_null_ordered 4227 and ( 4228 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4229 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4230 ) 4231 and self.dialect.NULL_ORDERING != "nulls_are_last" 4232 ): 4233 nulls_first = True 4234 4235 if self._match_text_seq("WITH", "FILL"): 4236 with_fill = self.expression( 4237 exp.WithFill, 4238 **{ # type: ignore 4239 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4240 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4241 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4242 "interpolate": self._parse_interpolate(), 4243 }, 4244 ) 4245 else: 4246 with_fill = None 4247 4248 return self.expression( 4249 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4250 ) 4251 4252 def _parse_limit( 4253 self, 4254 this: t.Optional[exp.Expression] = None, 4255 top: bool = False, 4256 skip_limit_token: bool = False, 4257 ) -> t.Optional[exp.Expression]: 4258 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4259 comments = self._prev_comments 4260 if top: 4261 limit_paren = self._match(TokenType.L_PAREN) 4262 expression = self._parse_term() if limit_paren else self._parse_number() 4263 4264 if limit_paren: 4265 self._match_r_paren() 4266 else: 4267 expression = self._parse_term() 4268 4269 if self._match(TokenType.COMMA): 4270 offset = expression 4271 expression = self._parse_term() 4272 else: 4273 offset = None 4274 4275 limit_exp = self.expression( 4276 exp.Limit, 4277 this=this, 4278 expression=expression, 4279 offset=offset, 4280 comments=comments, 4281 expressions=self._parse_limit_by(), 4282 ) 4283 4284 return limit_exp 4285 4286 if self._match(TokenType.FETCH): 4287 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4288 direction = self._prev.text.upper() if direction else "FIRST" 4289 4290 count = self._parse_field(tokens=self.FETCH_TOKENS) 4291 percent = self._match(TokenType.PERCENT) 4292 4293 self._match_set((TokenType.ROW, TokenType.ROWS)) 4294 4295 only = self._match_text_seq("ONLY") 4296 with_ties = self._match_text_seq("WITH", "TIES") 4297 4298 if only and with_ties: 4299 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4300 4301 return self.expression( 4302 exp.Fetch, 4303 direction=direction, 4304 count=count, 4305 percent=percent, 4306 with_ties=with_ties, 4307 ) 4308 4309 return this 4310 4311 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4312 if not self._match(TokenType.OFFSET): 4313 return this 4314 4315 count = self._parse_term() 4316 self._match_set((TokenType.ROW, TokenType.ROWS)) 4317 4318 return self.expression( 4319 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4320 ) 4321 4322 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4323 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4324 4325 def _parse_locks(self) -> t.List[exp.Lock]: 4326 locks = [] 4327 while True: 4328 if self._match_text_seq("FOR", "UPDATE"): 4329 update = True 4330 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4331 "LOCK", "IN", "SHARE", "MODE" 4332 ): 4333 update = False 4334 else: 4335 break 4336 4337 expressions = None 4338 if self._match_text_seq("OF"): 4339 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4340 4341 wait: t.Optional[bool | exp.Expression] = None 4342 if self._match_text_seq("NOWAIT"): 4343 wait = True 4344 elif self._match_text_seq("WAIT"): 4345 wait = self._parse_primary() 4346 elif self._match_text_seq("SKIP", "LOCKED"): 4347 wait = False 4348 4349 locks.append( 4350 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4351 ) 4352 4353 return locks 4354 4355 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4356 while this and self._match_set(self.SET_OPERATIONS): 4357 token_type = self._prev.token_type 4358 4359 if token_type == TokenType.UNION: 4360 operation: t.Type[exp.SetOperation] = exp.Union 4361 elif token_type == TokenType.EXCEPT: 4362 operation = exp.Except 4363 else: 4364 operation = exp.Intersect 4365 4366 comments = self._prev.comments 4367 4368 if self._match(TokenType.DISTINCT): 4369 distinct: t.Optional[bool] = True 4370 elif self._match(TokenType.ALL): 4371 distinct = False 4372 else: 4373 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4374 if distinct is None: 4375 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4376 4377 by_name = self._match_text_seq("BY", "NAME") 4378 expression = self._parse_select(nested=True, parse_set_operation=False) 4379 4380 this = self.expression( 4381 operation, 4382 comments=comments, 4383 this=this, 4384 distinct=distinct, 4385 by_name=by_name, 4386 expression=expression, 4387 ) 4388 4389 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4390 expression = this.expression 4391 4392 if expression: 4393 for arg in self.SET_OP_MODIFIERS: 4394 expr = expression.args.get(arg) 4395 if expr: 4396 this.set(arg, expr.pop()) 4397 4398 return this 4399 4400 def _parse_expression(self) -> t.Optional[exp.Expression]: 4401 return self._parse_alias(self._parse_assignment()) 4402 4403 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4404 this = self._parse_disjunction() 4405 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4406 # This allows us to parse <non-identifier token> := <expr> 4407 this = exp.column( 4408 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4409 ) 4410 4411 while self._match_set(self.ASSIGNMENT): 4412 if isinstance(this, exp.Column) and len(this.parts) == 1: 4413 this = this.this 4414 4415 this = self.expression( 4416 self.ASSIGNMENT[self._prev.token_type], 4417 this=this, 4418 comments=self._prev_comments, 4419 expression=self._parse_assignment(), 4420 ) 4421 4422 return this 4423 4424 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4425 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4426 4427 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4428 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4429 4430 def _parse_equality(self) -> t.Optional[exp.Expression]: 4431 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4432 4433 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4434 return self._parse_tokens(self._parse_range, self.COMPARISON) 4435 4436 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4437 this = this or self._parse_bitwise() 4438 negate = self._match(TokenType.NOT) 4439 4440 if self._match_set(self.RANGE_PARSERS): 4441 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4442 if not expression: 4443 return this 4444 4445 this = expression 4446 elif self._match(TokenType.ISNULL): 4447 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4448 4449 # Postgres supports ISNULL and NOTNULL for conditions. 4450 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4451 if self._match(TokenType.NOTNULL): 4452 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4453 this = self.expression(exp.Not, this=this) 4454 4455 if negate: 4456 this = self._negate_range(this) 4457 4458 if self._match(TokenType.IS): 4459 this = self._parse_is(this) 4460 4461 return this 4462 4463 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4464 if not this: 4465 return this 4466 4467 return self.expression(exp.Not, this=this) 4468 4469 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4470 index = self._index - 1 4471 negate = self._match(TokenType.NOT) 4472 4473 if self._match_text_seq("DISTINCT", "FROM"): 4474 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4475 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4476 4477 if self._match(TokenType.JSON): 4478 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4479 4480 if self._match_text_seq("WITH"): 4481 _with = True 4482 elif self._match_text_seq("WITHOUT"): 4483 _with = False 4484 else: 4485 _with = None 4486 4487 unique = self._match(TokenType.UNIQUE) 4488 self._match_text_seq("KEYS") 4489 expression: t.Optional[exp.Expression] = self.expression( 4490 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4491 ) 4492 else: 4493 expression = self._parse_primary() or self._parse_null() 4494 if not expression: 4495 self._retreat(index) 4496 return None 4497 4498 this = self.expression(exp.Is, this=this, expression=expression) 4499 return self.expression(exp.Not, this=this) if negate else this 4500 4501 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4502 unnest = self._parse_unnest(with_alias=False) 4503 if unnest: 4504 this = self.expression(exp.In, this=this, unnest=unnest) 4505 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4506 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4507 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4508 4509 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4510 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4511 else: 4512 this = self.expression(exp.In, this=this, expressions=expressions) 4513 4514 if matched_l_paren: 4515 self._match_r_paren(this) 4516 elif not self._match(TokenType.R_BRACKET, expression=this): 4517 self.raise_error("Expecting ]") 4518 else: 4519 this = self.expression(exp.In, this=this, field=self._parse_column()) 4520 4521 return this 4522 4523 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4524 low = self._parse_bitwise() 4525 self._match(TokenType.AND) 4526 high = self._parse_bitwise() 4527 return self.expression(exp.Between, this=this, low=low, high=high) 4528 4529 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4530 if not self._match(TokenType.ESCAPE): 4531 return this 4532 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4533 4534 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4535 index = self._index 4536 4537 if not self._match(TokenType.INTERVAL) and match_interval: 4538 return None 4539 4540 if self._match(TokenType.STRING, advance=False): 4541 this = self._parse_primary() 4542 else: 4543 this = self._parse_term() 4544 4545 if not this or ( 4546 isinstance(this, exp.Column) 4547 and not this.table 4548 and not this.this.quoted 4549 and this.name.upper() == "IS" 4550 ): 4551 self._retreat(index) 4552 return None 4553 4554 unit = self._parse_function() or ( 4555 not self._match(TokenType.ALIAS, advance=False) 4556 and self._parse_var(any_token=True, upper=True) 4557 ) 4558 4559 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4560 # each INTERVAL expression into this canonical form so it's easy to transpile 4561 if this and this.is_number: 4562 this = exp.Literal.string(this.to_py()) 4563 elif this and this.is_string: 4564 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4565 if len(parts) == 1: 4566 if unit: 4567 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4568 self._retreat(self._index - 1) 4569 4570 this = exp.Literal.string(parts[0][0]) 4571 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4572 4573 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4574 unit = self.expression( 4575 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4576 ) 4577 4578 interval = self.expression(exp.Interval, this=this, unit=unit) 4579 4580 index = self._index 4581 self._match(TokenType.PLUS) 4582 4583 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4584 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4585 return self.expression( 4586 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4587 ) 4588 4589 self._retreat(index) 4590 return interval 4591 4592 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4593 this = self._parse_term() 4594 4595 while True: 4596 if self._match_set(self.BITWISE): 4597 this = self.expression( 4598 self.BITWISE[self._prev.token_type], 4599 this=this, 4600 expression=self._parse_term(), 4601 ) 4602 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4603 this = self.expression( 4604 exp.DPipe, 4605 this=this, 4606 expression=self._parse_term(), 4607 safe=not self.dialect.STRICT_STRING_CONCAT, 4608 ) 4609 elif self._match(TokenType.DQMARK): 4610 this = self.expression( 4611 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4612 ) 4613 elif self._match_pair(TokenType.LT, TokenType.LT): 4614 this = self.expression( 4615 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4616 ) 4617 elif self._match_pair(TokenType.GT, TokenType.GT): 4618 this = self.expression( 4619 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4620 ) 4621 else: 4622 break 4623 4624 return this 4625 4626 def _parse_term(self) -> t.Optional[exp.Expression]: 4627 this = self._parse_factor() 4628 4629 while self._match_set(self.TERM): 4630 klass = self.TERM[self._prev.token_type] 4631 comments = self._prev_comments 4632 expression = self._parse_factor() 4633 4634 this = self.expression(klass, this=this, comments=comments, expression=expression) 4635 4636 if isinstance(this, exp.Collate): 4637 expr = this.expression 4638 4639 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4640 # fallback to Identifier / Var 4641 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4642 ident = expr.this 4643 if isinstance(ident, exp.Identifier): 4644 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4645 4646 return this 4647 4648 def _parse_factor(self) -> t.Optional[exp.Expression]: 4649 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4650 this = parse_method() 4651 4652 while self._match_set(self.FACTOR): 4653 klass = self.FACTOR[self._prev.token_type] 4654 comments = self._prev_comments 4655 expression = parse_method() 4656 4657 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4658 self._retreat(self._index - 1) 4659 return this 4660 4661 this = self.expression(klass, this=this, comments=comments, expression=expression) 4662 4663 if isinstance(this, exp.Div): 4664 this.args["typed"] = self.dialect.TYPED_DIVISION 4665 this.args["safe"] = self.dialect.SAFE_DIVISION 4666 4667 return this 4668 4669 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4670 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4671 4672 def _parse_unary(self) -> t.Optional[exp.Expression]: 4673 if self._match_set(self.UNARY_PARSERS): 4674 return self.UNARY_PARSERS[self._prev.token_type](self) 4675 return self._parse_at_time_zone(self._parse_type()) 4676 4677 def _parse_type( 4678 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4679 ) -> t.Optional[exp.Expression]: 4680 interval = parse_interval and self._parse_interval() 4681 if interval: 4682 return interval 4683 4684 index = self._index 4685 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4686 4687 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4688 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4689 if isinstance(data_type, exp.Cast): 4690 # This constructor can contain ops directly after it, for instance struct unnesting: 4691 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4692 return self._parse_column_ops(data_type) 4693 4694 if data_type: 4695 index2 = self._index 4696 this = self._parse_primary() 4697 4698 if isinstance(this, exp.Literal): 4699 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4700 if parser: 4701 return parser(self, this, data_type) 4702 4703 return self.expression(exp.Cast, this=this, to=data_type) 4704 4705 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4706 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4707 # 4708 # If the index difference here is greater than 1, that means the parser itself must have 4709 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4710 # 4711 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4712 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4713 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4714 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4715 # 4716 # In these cases, we don't really want to return the converted type, but instead retreat 4717 # and try to parse a Column or Identifier in the section below. 4718 if data_type.expressions and index2 - index > 1: 4719 self._retreat(index2) 4720 return self._parse_column_ops(data_type) 4721 4722 self._retreat(index) 4723 4724 if fallback_to_identifier: 4725 return self._parse_id_var() 4726 4727 this = self._parse_column() 4728 return this and self._parse_column_ops(this) 4729 4730 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4731 this = self._parse_type() 4732 if not this: 4733 return None 4734 4735 if isinstance(this, exp.Column) and not this.table: 4736 this = exp.var(this.name.upper()) 4737 4738 return self.expression( 4739 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4740 ) 4741 4742 def _parse_types( 4743 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4744 ) -> t.Optional[exp.Expression]: 4745 index = self._index 4746 4747 this: t.Optional[exp.Expression] = None 4748 prefix = self._match_text_seq("SYSUDTLIB", ".") 4749 4750 if not self._match_set(self.TYPE_TOKENS): 4751 identifier = allow_identifiers and self._parse_id_var( 4752 any_token=False, tokens=(TokenType.VAR,) 4753 ) 4754 if isinstance(identifier, exp.Identifier): 4755 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4756 4757 if len(tokens) != 1: 4758 self.raise_error("Unexpected identifier", self._prev) 4759 4760 if tokens[0].token_type in self.TYPE_TOKENS: 4761 self._prev = tokens[0] 4762 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4763 type_name = identifier.name 4764 4765 while self._match(TokenType.DOT): 4766 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4767 4768 this = exp.DataType.build(type_name, udt=True) 4769 else: 4770 self._retreat(self._index - 1) 4771 return None 4772 else: 4773 return None 4774 4775 type_token = self._prev.token_type 4776 4777 if type_token == TokenType.PSEUDO_TYPE: 4778 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4779 4780 if type_token == TokenType.OBJECT_IDENTIFIER: 4781 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4782 4783 # https://materialize.com/docs/sql/types/map/ 4784 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4785 key_type = self._parse_types( 4786 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4787 ) 4788 if not self._match(TokenType.FARROW): 4789 self._retreat(index) 4790 return None 4791 4792 value_type = self._parse_types( 4793 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4794 ) 4795 if not self._match(TokenType.R_BRACKET): 4796 self._retreat(index) 4797 return None 4798 4799 return exp.DataType( 4800 this=exp.DataType.Type.MAP, 4801 expressions=[key_type, value_type], 4802 nested=True, 4803 prefix=prefix, 4804 ) 4805 4806 nested = type_token in self.NESTED_TYPE_TOKENS 4807 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4808 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4809 expressions = None 4810 maybe_func = False 4811 4812 if self._match(TokenType.L_PAREN): 4813 if is_struct: 4814 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4815 elif nested: 4816 expressions = self._parse_csv( 4817 lambda: self._parse_types( 4818 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4819 ) 4820 ) 4821 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4822 this = expressions[0] 4823 this.set("nullable", True) 4824 self._match_r_paren() 4825 return this 4826 elif type_token in self.ENUM_TYPE_TOKENS: 4827 expressions = self._parse_csv(self._parse_equality) 4828 elif is_aggregate: 4829 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4830 any_token=False, tokens=(TokenType.VAR,) 4831 ) 4832 if not func_or_ident or not self._match(TokenType.COMMA): 4833 return None 4834 expressions = self._parse_csv( 4835 lambda: self._parse_types( 4836 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4837 ) 4838 ) 4839 expressions.insert(0, func_or_ident) 4840 else: 4841 expressions = self._parse_csv(self._parse_type_size) 4842 4843 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4844 if type_token == TokenType.VECTOR and len(expressions) == 2: 4845 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4846 4847 if not expressions or not self._match(TokenType.R_PAREN): 4848 self._retreat(index) 4849 return None 4850 4851 maybe_func = True 4852 4853 values: t.Optional[t.List[exp.Expression]] = None 4854 4855 if nested and self._match(TokenType.LT): 4856 if is_struct: 4857 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4858 else: 4859 expressions = self._parse_csv( 4860 lambda: self._parse_types( 4861 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4862 ) 4863 ) 4864 4865 if not self._match(TokenType.GT): 4866 self.raise_error("Expecting >") 4867 4868 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4869 values = self._parse_csv(self._parse_assignment) 4870 if not values and is_struct: 4871 values = None 4872 self._retreat(self._index - 1) 4873 else: 4874 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4875 4876 if type_token in self.TIMESTAMPS: 4877 if self._match_text_seq("WITH", "TIME", "ZONE"): 4878 maybe_func = False 4879 tz_type = ( 4880 exp.DataType.Type.TIMETZ 4881 if type_token in self.TIMES 4882 else exp.DataType.Type.TIMESTAMPTZ 4883 ) 4884 this = exp.DataType(this=tz_type, expressions=expressions) 4885 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4886 maybe_func = False 4887 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4888 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4889 maybe_func = False 4890 elif type_token == TokenType.INTERVAL: 4891 unit = self._parse_var(upper=True) 4892 if unit: 4893 if self._match_text_seq("TO"): 4894 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4895 4896 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4897 else: 4898 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4899 4900 if maybe_func and check_func: 4901 index2 = self._index 4902 peek = self._parse_string() 4903 4904 if not peek: 4905 self._retreat(index) 4906 return None 4907 4908 self._retreat(index2) 4909 4910 if not this: 4911 if self._match_text_seq("UNSIGNED"): 4912 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4913 if not unsigned_type_token: 4914 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4915 4916 type_token = unsigned_type_token or type_token 4917 4918 this = exp.DataType( 4919 this=exp.DataType.Type[type_token.value], 4920 expressions=expressions, 4921 nested=nested, 4922 prefix=prefix, 4923 ) 4924 4925 # Empty arrays/structs are allowed 4926 if values is not None: 4927 cls = exp.Struct if is_struct else exp.Array 4928 this = exp.cast(cls(expressions=values), this, copy=False) 4929 4930 elif expressions: 4931 this.set("expressions", expressions) 4932 4933 # https://materialize.com/docs/sql/types/list/#type-name 4934 while self._match(TokenType.LIST): 4935 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4936 4937 index = self._index 4938 4939 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4940 matched_array = self._match(TokenType.ARRAY) 4941 4942 while self._curr: 4943 datatype_token = self._prev.token_type 4944 matched_l_bracket = self._match(TokenType.L_BRACKET) 4945 if not matched_l_bracket and not matched_array: 4946 break 4947 4948 matched_array = False 4949 values = self._parse_csv(self._parse_assignment) or None 4950 if ( 4951 values 4952 and not schema 4953 and ( 4954 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4955 ) 4956 ): 4957 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4958 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4959 self._retreat(index) 4960 break 4961 4962 this = exp.DataType( 4963 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4964 ) 4965 self._match(TokenType.R_BRACKET) 4966 4967 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4968 converter = self.TYPE_CONVERTERS.get(this.this) 4969 if converter: 4970 this = converter(t.cast(exp.DataType, this)) 4971 4972 return this 4973 4974 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4975 index = self._index 4976 4977 if ( 4978 self._curr 4979 and self._next 4980 and self._curr.token_type in self.TYPE_TOKENS 4981 and self._next.token_type in self.TYPE_TOKENS 4982 ): 4983 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4984 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4985 this = self._parse_id_var() 4986 else: 4987 this = ( 4988 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4989 or self._parse_id_var() 4990 ) 4991 4992 self._match(TokenType.COLON) 4993 4994 if ( 4995 type_required 4996 and not isinstance(this, exp.DataType) 4997 and not self._match_set(self.TYPE_TOKENS, advance=False) 4998 ): 4999 self._retreat(index) 5000 return self._parse_types() 5001 5002 return self._parse_column_def(this) 5003 5004 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5005 if not self._match_text_seq("AT", "TIME", "ZONE"): 5006 return this 5007 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5008 5009 def _parse_column(self) -> t.Optional[exp.Expression]: 5010 this = self._parse_column_reference() 5011 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5012 5013 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5014 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5015 5016 return column 5017 5018 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5019 this = self._parse_field() 5020 if ( 5021 not this 5022 and self._match(TokenType.VALUES, advance=False) 5023 and self.VALUES_FOLLOWED_BY_PAREN 5024 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5025 ): 5026 this = self._parse_id_var() 5027 5028 if isinstance(this, exp.Identifier): 5029 # We bubble up comments from the Identifier to the Column 5030 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5031 5032 return this 5033 5034 def _parse_colon_as_variant_extract( 5035 self, this: t.Optional[exp.Expression] 5036 ) -> t.Optional[exp.Expression]: 5037 casts = [] 5038 json_path = [] 5039 escape = None 5040 5041 while self._match(TokenType.COLON): 5042 start_index = self._index 5043 5044 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5045 path = self._parse_column_ops( 5046 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5047 ) 5048 5049 # The cast :: operator has a lower precedence than the extraction operator :, so 5050 # we rearrange the AST appropriately to avoid casting the JSON path 5051 while isinstance(path, exp.Cast): 5052 casts.append(path.to) 5053 path = path.this 5054 5055 if casts: 5056 dcolon_offset = next( 5057 i 5058 for i, t in enumerate(self._tokens[start_index:]) 5059 if t.token_type == TokenType.DCOLON 5060 ) 5061 end_token = self._tokens[start_index + dcolon_offset - 1] 5062 else: 5063 end_token = self._prev 5064 5065 if path: 5066 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5067 # it'll roundtrip to a string literal in GET_PATH 5068 if isinstance(path, exp.Identifier) and path.quoted: 5069 escape = True 5070 5071 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5072 5073 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5074 # Databricks transforms it back to the colon/dot notation 5075 if json_path: 5076 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5077 5078 if json_path_expr: 5079 json_path_expr.set("escape", escape) 5080 5081 this = self.expression( 5082 exp.JSONExtract, 5083 this=this, 5084 expression=json_path_expr, 5085 variant_extract=True, 5086 ) 5087 5088 while casts: 5089 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5090 5091 return this 5092 5093 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5094 return self._parse_types() 5095 5096 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5097 this = self._parse_bracket(this) 5098 5099 while self._match_set(self.COLUMN_OPERATORS): 5100 op_token = self._prev.token_type 5101 op = self.COLUMN_OPERATORS.get(op_token) 5102 5103 if op_token == TokenType.DCOLON: 5104 field = self._parse_dcolon() 5105 if not field: 5106 self.raise_error("Expected type") 5107 elif op and self._curr: 5108 field = self._parse_column_reference() or self._parse_bracket() 5109 else: 5110 field = self._parse_field(any_token=True, anonymous_func=True) 5111 5112 if isinstance(field, exp.Func) and this: 5113 # bigquery allows function calls like x.y.count(...) 5114 # SAFE.SUBSTR(...) 5115 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5116 this = exp.replace_tree( 5117 this, 5118 lambda n: ( 5119 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5120 if n.table 5121 else n.this 5122 ) 5123 if isinstance(n, exp.Column) 5124 else n, 5125 ) 5126 5127 if op: 5128 this = op(self, this, field) 5129 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5130 this = self.expression( 5131 exp.Column, 5132 comments=this.comments, 5133 this=field, 5134 table=this.this, 5135 db=this.args.get("table"), 5136 catalog=this.args.get("db"), 5137 ) 5138 else: 5139 this = self.expression(exp.Dot, this=this, expression=field) 5140 5141 if field and field.comments: 5142 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5143 5144 this = self._parse_bracket(this) 5145 5146 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5147 5148 def _parse_primary(self) -> t.Optional[exp.Expression]: 5149 if self._match_set(self.PRIMARY_PARSERS): 5150 token_type = self._prev.token_type 5151 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5152 5153 if token_type == TokenType.STRING: 5154 expressions = [primary] 5155 while self._match(TokenType.STRING): 5156 expressions.append(exp.Literal.string(self._prev.text)) 5157 5158 if len(expressions) > 1: 5159 return self.expression(exp.Concat, expressions=expressions) 5160 5161 return primary 5162 5163 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5164 return exp.Literal.number(f"0.{self._prev.text}") 5165 5166 if self._match(TokenType.L_PAREN): 5167 comments = self._prev_comments 5168 query = self._parse_select() 5169 5170 if query: 5171 expressions = [query] 5172 else: 5173 expressions = self._parse_expressions() 5174 5175 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5176 5177 if not this and self._match(TokenType.R_PAREN, advance=False): 5178 this = self.expression(exp.Tuple) 5179 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5180 this = self._parse_subquery(this=this, parse_alias=False) 5181 elif isinstance(this, exp.Subquery): 5182 this = self._parse_subquery( 5183 this=self._parse_set_operations(this), parse_alias=False 5184 ) 5185 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5186 this = self.expression(exp.Tuple, expressions=expressions) 5187 else: 5188 this = self.expression(exp.Paren, this=this) 5189 5190 if this: 5191 this.add_comments(comments) 5192 5193 self._match_r_paren(expression=this) 5194 return this 5195 5196 return None 5197 5198 def _parse_field( 5199 self, 5200 any_token: bool = False, 5201 tokens: t.Optional[t.Collection[TokenType]] = None, 5202 anonymous_func: bool = False, 5203 ) -> t.Optional[exp.Expression]: 5204 if anonymous_func: 5205 field = ( 5206 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5207 or self._parse_primary() 5208 ) 5209 else: 5210 field = self._parse_primary() or self._parse_function( 5211 anonymous=anonymous_func, any_token=any_token 5212 ) 5213 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5214 5215 def _parse_function( 5216 self, 5217 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5218 anonymous: bool = False, 5219 optional_parens: bool = True, 5220 any_token: bool = False, 5221 ) -> t.Optional[exp.Expression]: 5222 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5223 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5224 fn_syntax = False 5225 if ( 5226 self._match(TokenType.L_BRACE, advance=False) 5227 and self._next 5228 and self._next.text.upper() == "FN" 5229 ): 5230 self._advance(2) 5231 fn_syntax = True 5232 5233 func = self._parse_function_call( 5234 functions=functions, 5235 anonymous=anonymous, 5236 optional_parens=optional_parens, 5237 any_token=any_token, 5238 ) 5239 5240 if fn_syntax: 5241 self._match(TokenType.R_BRACE) 5242 5243 return func 5244 5245 def _parse_function_call( 5246 self, 5247 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5248 anonymous: bool = False, 5249 optional_parens: bool = True, 5250 any_token: bool = False, 5251 ) -> t.Optional[exp.Expression]: 5252 if not self._curr: 5253 return None 5254 5255 comments = self._curr.comments 5256 token_type = self._curr.token_type 5257 this = self._curr.text 5258 upper = this.upper() 5259 5260 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5261 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5262 self._advance() 5263 return self._parse_window(parser(self)) 5264 5265 if not self._next or self._next.token_type != TokenType.L_PAREN: 5266 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5267 self._advance() 5268 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5269 5270 return None 5271 5272 if any_token: 5273 if token_type in self.RESERVED_TOKENS: 5274 return None 5275 elif token_type not in self.FUNC_TOKENS: 5276 return None 5277 5278 self._advance(2) 5279 5280 parser = self.FUNCTION_PARSERS.get(upper) 5281 if parser and not anonymous: 5282 this = parser(self) 5283 else: 5284 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5285 5286 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5287 this = self.expression( 5288 subquery_predicate, comments=comments, this=self._parse_select() 5289 ) 5290 self._match_r_paren() 5291 return this 5292 5293 if functions is None: 5294 functions = self.FUNCTIONS 5295 5296 function = functions.get(upper) 5297 5298 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5299 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5300 5301 if alias: 5302 args = self._kv_to_prop_eq(args) 5303 5304 if function and not anonymous: 5305 if "dialect" in function.__code__.co_varnames: 5306 func = function(args, dialect=self.dialect) 5307 else: 5308 func = function(args) 5309 5310 func = self.validate_expression(func, args) 5311 if not self.dialect.NORMALIZE_FUNCTIONS: 5312 func.meta["name"] = this 5313 5314 this = func 5315 else: 5316 if token_type == TokenType.IDENTIFIER: 5317 this = exp.Identifier(this=this, quoted=True) 5318 this = self.expression(exp.Anonymous, this=this, expressions=args) 5319 5320 if isinstance(this, exp.Expression): 5321 this.add_comments(comments) 5322 5323 self._match_r_paren(this) 5324 return self._parse_window(this) 5325 5326 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5327 return expression 5328 5329 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5330 transformed = [] 5331 5332 for index, e in enumerate(expressions): 5333 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5334 if isinstance(e, exp.Alias): 5335 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5336 5337 if not isinstance(e, exp.PropertyEQ): 5338 e = self.expression( 5339 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5340 ) 5341 5342 if isinstance(e.this, exp.Column): 5343 e.this.replace(e.this.this) 5344 else: 5345 e = self._to_prop_eq(e, index) 5346 5347 transformed.append(e) 5348 5349 return transformed 5350 5351 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5352 return self._parse_statement() 5353 5354 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5355 return self._parse_column_def(self._parse_id_var()) 5356 5357 def _parse_user_defined_function( 5358 self, kind: t.Optional[TokenType] = None 5359 ) -> t.Optional[exp.Expression]: 5360 this = self._parse_id_var() 5361 5362 while self._match(TokenType.DOT): 5363 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5364 5365 if not self._match(TokenType.L_PAREN): 5366 return this 5367 5368 expressions = self._parse_csv(self._parse_function_parameter) 5369 self._match_r_paren() 5370 return self.expression( 5371 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5372 ) 5373 5374 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5375 literal = self._parse_primary() 5376 if literal: 5377 return self.expression(exp.Introducer, this=token.text, expression=literal) 5378 5379 return self.expression(exp.Identifier, this=token.text) 5380 5381 def _parse_session_parameter(self) -> exp.SessionParameter: 5382 kind = None 5383 this = self._parse_id_var() or self._parse_primary() 5384 5385 if this and self._match(TokenType.DOT): 5386 kind = this.name 5387 this = self._parse_var() or self._parse_primary() 5388 5389 return self.expression(exp.SessionParameter, this=this, kind=kind) 5390 5391 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5392 return self._parse_id_var() 5393 5394 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5395 index = self._index 5396 5397 if self._match(TokenType.L_PAREN): 5398 expressions = t.cast( 5399 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5400 ) 5401 5402 if not self._match(TokenType.R_PAREN): 5403 self._retreat(index) 5404 else: 5405 expressions = [self._parse_lambda_arg()] 5406 5407 if self._match_set(self.LAMBDAS): 5408 return self.LAMBDAS[self._prev.token_type](self, expressions) 5409 5410 self._retreat(index) 5411 5412 this: t.Optional[exp.Expression] 5413 5414 if self._match(TokenType.DISTINCT): 5415 this = self.expression( 5416 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5417 ) 5418 else: 5419 this = self._parse_select_or_expression(alias=alias) 5420 5421 return self._parse_limit( 5422 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5423 ) 5424 5425 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5426 index = self._index 5427 if not self._match(TokenType.L_PAREN): 5428 return this 5429 5430 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5431 # expr can be of both types 5432 if self._match_set(self.SELECT_START_TOKENS): 5433 self._retreat(index) 5434 return this 5435 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5436 self._match_r_paren() 5437 return self.expression(exp.Schema, this=this, expressions=args) 5438 5439 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5440 return self._parse_column_def(self._parse_field(any_token=True)) 5441 5442 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5443 # column defs are not really columns, they're identifiers 5444 if isinstance(this, exp.Column): 5445 this = this.this 5446 5447 kind = self._parse_types(schema=True) 5448 5449 if self._match_text_seq("FOR", "ORDINALITY"): 5450 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5451 5452 constraints: t.List[exp.Expression] = [] 5453 5454 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5455 ("ALIAS", "MATERIALIZED") 5456 ): 5457 persisted = self._prev.text.upper() == "MATERIALIZED" 5458 constraint_kind = exp.ComputedColumnConstraint( 5459 this=self._parse_assignment(), 5460 persisted=persisted or self._match_text_seq("PERSISTED"), 5461 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5462 ) 5463 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5464 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5465 self._match(TokenType.ALIAS) 5466 constraints.append( 5467 self.expression( 5468 exp.ColumnConstraint, 5469 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5470 ) 5471 ) 5472 5473 while True: 5474 constraint = self._parse_column_constraint() 5475 if not constraint: 5476 break 5477 constraints.append(constraint) 5478 5479 if not kind and not constraints: 5480 return this 5481 5482 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5483 5484 def _parse_auto_increment( 5485 self, 5486 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5487 start = None 5488 increment = None 5489 5490 if self._match(TokenType.L_PAREN, advance=False): 5491 args = self._parse_wrapped_csv(self._parse_bitwise) 5492 start = seq_get(args, 0) 5493 increment = seq_get(args, 1) 5494 elif self._match_text_seq("START"): 5495 start = self._parse_bitwise() 5496 self._match_text_seq("INCREMENT") 5497 increment = self._parse_bitwise() 5498 5499 if start and increment: 5500 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5501 5502 return exp.AutoIncrementColumnConstraint() 5503 5504 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5505 if not self._match_text_seq("REFRESH"): 5506 self._retreat(self._index - 1) 5507 return None 5508 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5509 5510 def _parse_compress(self) -> exp.CompressColumnConstraint: 5511 if self._match(TokenType.L_PAREN, advance=False): 5512 return self.expression( 5513 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5514 ) 5515 5516 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5517 5518 def _parse_generated_as_identity( 5519 self, 5520 ) -> ( 5521 exp.GeneratedAsIdentityColumnConstraint 5522 | exp.ComputedColumnConstraint 5523 | exp.GeneratedAsRowColumnConstraint 5524 ): 5525 if self._match_text_seq("BY", "DEFAULT"): 5526 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5527 this = self.expression( 5528 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5529 ) 5530 else: 5531 self._match_text_seq("ALWAYS") 5532 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5533 5534 self._match(TokenType.ALIAS) 5535 5536 if self._match_text_seq("ROW"): 5537 start = self._match_text_seq("START") 5538 if not start: 5539 self._match(TokenType.END) 5540 hidden = self._match_text_seq("HIDDEN") 5541 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5542 5543 identity = self._match_text_seq("IDENTITY") 5544 5545 if self._match(TokenType.L_PAREN): 5546 if self._match(TokenType.START_WITH): 5547 this.set("start", self._parse_bitwise()) 5548 if self._match_text_seq("INCREMENT", "BY"): 5549 this.set("increment", self._parse_bitwise()) 5550 if self._match_text_seq("MINVALUE"): 5551 this.set("minvalue", self._parse_bitwise()) 5552 if self._match_text_seq("MAXVALUE"): 5553 this.set("maxvalue", self._parse_bitwise()) 5554 5555 if self._match_text_seq("CYCLE"): 5556 this.set("cycle", True) 5557 elif self._match_text_seq("NO", "CYCLE"): 5558 this.set("cycle", False) 5559 5560 if not identity: 5561 this.set("expression", self._parse_range()) 5562 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5563 args = self._parse_csv(self._parse_bitwise) 5564 this.set("start", seq_get(args, 0)) 5565 this.set("increment", seq_get(args, 1)) 5566 5567 self._match_r_paren() 5568 5569 return this 5570 5571 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5572 self._match_text_seq("LENGTH") 5573 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5574 5575 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5576 if self._match_text_seq("NULL"): 5577 return self.expression(exp.NotNullColumnConstraint) 5578 if self._match_text_seq("CASESPECIFIC"): 5579 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5580 if self._match_text_seq("FOR", "REPLICATION"): 5581 return self.expression(exp.NotForReplicationColumnConstraint) 5582 5583 # Unconsume the `NOT` token 5584 self._retreat(self._index - 1) 5585 return None 5586 5587 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5588 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5589 5590 procedure_option_follows = ( 5591 self._match(TokenType.WITH, advance=False) 5592 and self._next 5593 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5594 ) 5595 5596 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5597 return self.expression( 5598 exp.ColumnConstraint, 5599 this=this, 5600 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5601 ) 5602 5603 return this 5604 5605 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5606 if not self._match(TokenType.CONSTRAINT): 5607 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5608 5609 return self.expression( 5610 exp.Constraint, 5611 this=self._parse_id_var(), 5612 expressions=self._parse_unnamed_constraints(), 5613 ) 5614 5615 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5616 constraints = [] 5617 while True: 5618 constraint = self._parse_unnamed_constraint() or self._parse_function() 5619 if not constraint: 5620 break 5621 constraints.append(constraint) 5622 5623 return constraints 5624 5625 def _parse_unnamed_constraint( 5626 self, constraints: t.Optional[t.Collection[str]] = None 5627 ) -> t.Optional[exp.Expression]: 5628 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5629 constraints or self.CONSTRAINT_PARSERS 5630 ): 5631 return None 5632 5633 constraint = self._prev.text.upper() 5634 if constraint not in self.CONSTRAINT_PARSERS: 5635 self.raise_error(f"No parser found for schema constraint {constraint}.") 5636 5637 return self.CONSTRAINT_PARSERS[constraint](self) 5638 5639 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5640 return self._parse_id_var(any_token=False) 5641 5642 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5643 self._match_text_seq("KEY") 5644 return self.expression( 5645 exp.UniqueColumnConstraint, 5646 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5647 this=self._parse_schema(self._parse_unique_key()), 5648 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5649 on_conflict=self._parse_on_conflict(), 5650 ) 5651 5652 def _parse_key_constraint_options(self) -> t.List[str]: 5653 options = [] 5654 while True: 5655 if not self._curr: 5656 break 5657 5658 if self._match(TokenType.ON): 5659 action = None 5660 on = self._advance_any() and self._prev.text 5661 5662 if self._match_text_seq("NO", "ACTION"): 5663 action = "NO ACTION" 5664 elif self._match_text_seq("CASCADE"): 5665 action = "CASCADE" 5666 elif self._match_text_seq("RESTRICT"): 5667 action = "RESTRICT" 5668 elif self._match_pair(TokenType.SET, TokenType.NULL): 5669 action = "SET NULL" 5670 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5671 action = "SET DEFAULT" 5672 else: 5673 self.raise_error("Invalid key constraint") 5674 5675 options.append(f"ON {on} {action}") 5676 else: 5677 var = self._parse_var_from_options( 5678 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5679 ) 5680 if not var: 5681 break 5682 options.append(var.name) 5683 5684 return options 5685 5686 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5687 if match and not self._match(TokenType.REFERENCES): 5688 return None 5689 5690 expressions = None 5691 this = self._parse_table(schema=True) 5692 options = self._parse_key_constraint_options() 5693 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5694 5695 def _parse_foreign_key(self) -> exp.ForeignKey: 5696 expressions = self._parse_wrapped_id_vars() 5697 reference = self._parse_references() 5698 options = {} 5699 5700 while self._match(TokenType.ON): 5701 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5702 self.raise_error("Expected DELETE or UPDATE") 5703 5704 kind = self._prev.text.lower() 5705 5706 if self._match_text_seq("NO", "ACTION"): 5707 action = "NO ACTION" 5708 elif self._match(TokenType.SET): 5709 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5710 action = "SET " + self._prev.text.upper() 5711 else: 5712 self._advance() 5713 action = self._prev.text.upper() 5714 5715 options[kind] = action 5716 5717 return self.expression( 5718 exp.ForeignKey, 5719 expressions=expressions, 5720 reference=reference, 5721 **options, # type: ignore 5722 ) 5723 5724 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5725 return self._parse_field() 5726 5727 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5728 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5729 self._retreat(self._index - 1) 5730 return None 5731 5732 id_vars = self._parse_wrapped_id_vars() 5733 return self.expression( 5734 exp.PeriodForSystemTimeConstraint, 5735 this=seq_get(id_vars, 0), 5736 expression=seq_get(id_vars, 1), 5737 ) 5738 5739 def _parse_primary_key( 5740 self, wrapped_optional: bool = False, in_props: bool = False 5741 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5742 desc = ( 5743 self._match_set((TokenType.ASC, TokenType.DESC)) 5744 and self._prev.token_type == TokenType.DESC 5745 ) 5746 5747 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5748 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5749 5750 expressions = self._parse_wrapped_csv( 5751 self._parse_primary_key_part, optional=wrapped_optional 5752 ) 5753 options = self._parse_key_constraint_options() 5754 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5755 5756 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5757 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5758 5759 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5760 """ 5761 Parses a datetime column in ODBC format. We parse the column into the corresponding 5762 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5763 same as we did for `DATE('yyyy-mm-dd')`. 5764 5765 Reference: 5766 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5767 """ 5768 self._match(TokenType.VAR) 5769 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5770 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5771 if not self._match(TokenType.R_BRACE): 5772 self.raise_error("Expected }") 5773 return expression 5774 5775 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5776 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5777 return this 5778 5779 bracket_kind = self._prev.token_type 5780 if ( 5781 bracket_kind == TokenType.L_BRACE 5782 and self._curr 5783 and self._curr.token_type == TokenType.VAR 5784 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5785 ): 5786 return self._parse_odbc_datetime_literal() 5787 5788 expressions = self._parse_csv( 5789 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5790 ) 5791 5792 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5793 self.raise_error("Expected ]") 5794 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5795 self.raise_error("Expected }") 5796 5797 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5798 if bracket_kind == TokenType.L_BRACE: 5799 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5800 elif not this: 5801 this = build_array_constructor( 5802 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5803 ) 5804 else: 5805 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5806 if constructor_type: 5807 return build_array_constructor( 5808 constructor_type, 5809 args=expressions, 5810 bracket_kind=bracket_kind, 5811 dialect=self.dialect, 5812 ) 5813 5814 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5815 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5816 5817 self._add_comments(this) 5818 return self._parse_bracket(this) 5819 5820 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5821 if self._match(TokenType.COLON): 5822 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5823 return this 5824 5825 def _parse_case(self) -> t.Optional[exp.Expression]: 5826 ifs = [] 5827 default = None 5828 5829 comments = self._prev_comments 5830 expression = self._parse_assignment() 5831 5832 while self._match(TokenType.WHEN): 5833 this = self._parse_assignment() 5834 self._match(TokenType.THEN) 5835 then = self._parse_assignment() 5836 ifs.append(self.expression(exp.If, this=this, true=then)) 5837 5838 if self._match(TokenType.ELSE): 5839 default = self._parse_assignment() 5840 5841 if not self._match(TokenType.END): 5842 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5843 default = exp.column("interval") 5844 else: 5845 self.raise_error("Expected END after CASE", self._prev) 5846 5847 return self.expression( 5848 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5849 ) 5850 5851 def _parse_if(self) -> t.Optional[exp.Expression]: 5852 if self._match(TokenType.L_PAREN): 5853 args = self._parse_csv(self._parse_assignment) 5854 this = self.validate_expression(exp.If.from_arg_list(args), args) 5855 self._match_r_paren() 5856 else: 5857 index = self._index - 1 5858 5859 if self.NO_PAREN_IF_COMMANDS and index == 0: 5860 return self._parse_as_command(self._prev) 5861 5862 condition = self._parse_assignment() 5863 5864 if not condition: 5865 self._retreat(index) 5866 return None 5867 5868 self._match(TokenType.THEN) 5869 true = self._parse_assignment() 5870 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5871 self._match(TokenType.END) 5872 this = self.expression(exp.If, this=condition, true=true, false=false) 5873 5874 return this 5875 5876 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5877 if not self._match_text_seq("VALUE", "FOR"): 5878 self._retreat(self._index - 1) 5879 return None 5880 5881 return self.expression( 5882 exp.NextValueFor, 5883 this=self._parse_column(), 5884 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5885 ) 5886 5887 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5888 this = self._parse_function() or self._parse_var_or_string(upper=True) 5889 5890 if self._match(TokenType.FROM): 5891 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5892 5893 if not self._match(TokenType.COMMA): 5894 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5895 5896 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5897 5898 def _parse_gap_fill(self) -> exp.GapFill: 5899 self._match(TokenType.TABLE) 5900 this = self._parse_table() 5901 5902 self._match(TokenType.COMMA) 5903 args = [this, *self._parse_csv(self._parse_lambda)] 5904 5905 gap_fill = exp.GapFill.from_arg_list(args) 5906 return self.validate_expression(gap_fill, args) 5907 5908 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5909 this = self._parse_assignment() 5910 5911 if not self._match(TokenType.ALIAS): 5912 if self._match(TokenType.COMMA): 5913 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5914 5915 self.raise_error("Expected AS after CAST") 5916 5917 fmt = None 5918 to = self._parse_types() 5919 5920 if self._match(TokenType.FORMAT): 5921 fmt_string = self._parse_string() 5922 fmt = self._parse_at_time_zone(fmt_string) 5923 5924 if not to: 5925 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5926 if to.this in exp.DataType.TEMPORAL_TYPES: 5927 this = self.expression( 5928 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5929 this=this, 5930 format=exp.Literal.string( 5931 format_time( 5932 fmt_string.this if fmt_string else "", 5933 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5934 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5935 ) 5936 ), 5937 safe=safe, 5938 ) 5939 5940 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5941 this.set("zone", fmt.args["zone"]) 5942 return this 5943 elif not to: 5944 self.raise_error("Expected TYPE after CAST") 5945 elif isinstance(to, exp.Identifier): 5946 to = exp.DataType.build(to.name, udt=True) 5947 elif to.this == exp.DataType.Type.CHAR: 5948 if self._match(TokenType.CHARACTER_SET): 5949 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5950 5951 return self.expression( 5952 exp.Cast if strict else exp.TryCast, 5953 this=this, 5954 to=to, 5955 format=fmt, 5956 safe=safe, 5957 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5958 ) 5959 5960 def _parse_string_agg(self) -> exp.GroupConcat: 5961 if self._match(TokenType.DISTINCT): 5962 args: t.List[t.Optional[exp.Expression]] = [ 5963 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5964 ] 5965 if self._match(TokenType.COMMA): 5966 args.extend(self._parse_csv(self._parse_assignment)) 5967 else: 5968 args = self._parse_csv(self._parse_assignment) # type: ignore 5969 5970 if self._match_text_seq("ON", "OVERFLOW"): 5971 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 5972 if self._match_text_seq("ERROR"): 5973 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 5974 else: 5975 self._match_text_seq("TRUNCATE") 5976 on_overflow = self.expression( 5977 exp.OverflowTruncateBehavior, 5978 this=self._parse_string(), 5979 with_count=( 5980 self._match_text_seq("WITH", "COUNT") 5981 or not self._match_text_seq("WITHOUT", "COUNT") 5982 ), 5983 ) 5984 else: 5985 on_overflow = None 5986 5987 index = self._index 5988 if not self._match(TokenType.R_PAREN) and args: 5989 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5990 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5991 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5992 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5993 5994 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5995 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5996 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5997 if not self._match_text_seq("WITHIN", "GROUP"): 5998 self._retreat(index) 5999 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6000 6001 # The corresponding match_r_paren will be called in parse_function (caller) 6002 self._match_l_paren() 6003 6004 return self.expression( 6005 exp.GroupConcat, 6006 this=self._parse_order(this=seq_get(args, 0)), 6007 separator=seq_get(args, 1), 6008 on_overflow=on_overflow, 6009 ) 6010 6011 def _parse_convert( 6012 self, strict: bool, safe: t.Optional[bool] = None 6013 ) -> t.Optional[exp.Expression]: 6014 this = self._parse_bitwise() 6015 6016 if self._match(TokenType.USING): 6017 to: t.Optional[exp.Expression] = self.expression( 6018 exp.CharacterSet, this=self._parse_var() 6019 ) 6020 elif self._match(TokenType.COMMA): 6021 to = self._parse_types() 6022 else: 6023 to = None 6024 6025 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6026 6027 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6028 """ 6029 There are generally two variants of the DECODE function: 6030 6031 - DECODE(bin, charset) 6032 - DECODE(expression, search, result [, search, result] ... [, default]) 6033 6034 The second variant will always be parsed into a CASE expression. Note that NULL 6035 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6036 instead of relying on pattern matching. 6037 """ 6038 args = self._parse_csv(self._parse_assignment) 6039 6040 if len(args) < 3: 6041 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6042 6043 expression, *expressions = args 6044 if not expression: 6045 return None 6046 6047 ifs = [] 6048 for search, result in zip(expressions[::2], expressions[1::2]): 6049 if not search or not result: 6050 return None 6051 6052 if isinstance(search, exp.Literal): 6053 ifs.append( 6054 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6055 ) 6056 elif isinstance(search, exp.Null): 6057 ifs.append( 6058 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6059 ) 6060 else: 6061 cond = exp.or_( 6062 exp.EQ(this=expression.copy(), expression=search), 6063 exp.and_( 6064 exp.Is(this=expression.copy(), expression=exp.Null()), 6065 exp.Is(this=search.copy(), expression=exp.Null()), 6066 copy=False, 6067 ), 6068 copy=False, 6069 ) 6070 ifs.append(exp.If(this=cond, true=result)) 6071 6072 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6073 6074 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6075 self._match_text_seq("KEY") 6076 key = self._parse_column() 6077 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6078 self._match_text_seq("VALUE") 6079 value = self._parse_bitwise() 6080 6081 if not key and not value: 6082 return None 6083 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6084 6085 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6086 if not this or not self._match_text_seq("FORMAT", "JSON"): 6087 return this 6088 6089 return self.expression(exp.FormatJson, this=this) 6090 6091 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6092 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6093 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6094 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6095 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6096 else: 6097 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6098 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6099 6100 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6101 6102 if not empty and not error and not null: 6103 return None 6104 6105 return self.expression( 6106 exp.OnCondition, 6107 empty=empty, 6108 error=error, 6109 null=null, 6110 ) 6111 6112 def _parse_on_handling( 6113 self, on: str, *values: str 6114 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6115 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6116 for value in values: 6117 if self._match_text_seq(value, "ON", on): 6118 return f"{value} ON {on}" 6119 6120 index = self._index 6121 if self._match(TokenType.DEFAULT): 6122 default_value = self._parse_bitwise() 6123 if self._match_text_seq("ON", on): 6124 return default_value 6125 6126 self._retreat(index) 6127 6128 return None 6129 6130 @t.overload 6131 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6132 6133 @t.overload 6134 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6135 6136 def _parse_json_object(self, agg=False): 6137 star = self._parse_star() 6138 expressions = ( 6139 [star] 6140 if star 6141 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6142 ) 6143 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6144 6145 unique_keys = None 6146 if self._match_text_seq("WITH", "UNIQUE"): 6147 unique_keys = True 6148 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6149 unique_keys = False 6150 6151 self._match_text_seq("KEYS") 6152 6153 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6154 self._parse_type() 6155 ) 6156 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6157 6158 return self.expression( 6159 exp.JSONObjectAgg if agg else exp.JSONObject, 6160 expressions=expressions, 6161 null_handling=null_handling, 6162 unique_keys=unique_keys, 6163 return_type=return_type, 6164 encoding=encoding, 6165 ) 6166 6167 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6168 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6169 if not self._match_text_seq("NESTED"): 6170 this = self._parse_id_var() 6171 kind = self._parse_types(allow_identifiers=False) 6172 nested = None 6173 else: 6174 this = None 6175 kind = None 6176 nested = True 6177 6178 path = self._match_text_seq("PATH") and self._parse_string() 6179 nested_schema = nested and self._parse_json_schema() 6180 6181 return self.expression( 6182 exp.JSONColumnDef, 6183 this=this, 6184 kind=kind, 6185 path=path, 6186 nested_schema=nested_schema, 6187 ) 6188 6189 def _parse_json_schema(self) -> exp.JSONSchema: 6190 self._match_text_seq("COLUMNS") 6191 return self.expression( 6192 exp.JSONSchema, 6193 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6194 ) 6195 6196 def _parse_json_table(self) -> exp.JSONTable: 6197 this = self._parse_format_json(self._parse_bitwise()) 6198 path = self._match(TokenType.COMMA) and self._parse_string() 6199 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6200 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6201 schema = self._parse_json_schema() 6202 6203 return exp.JSONTable( 6204 this=this, 6205 schema=schema, 6206 path=path, 6207 error_handling=error_handling, 6208 empty_handling=empty_handling, 6209 ) 6210 6211 def _parse_match_against(self) -> exp.MatchAgainst: 6212 expressions = self._parse_csv(self._parse_column) 6213 6214 self._match_text_seq(")", "AGAINST", "(") 6215 6216 this = self._parse_string() 6217 6218 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6219 modifier = "IN NATURAL LANGUAGE MODE" 6220 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6221 modifier = f"{modifier} WITH QUERY EXPANSION" 6222 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6223 modifier = "IN BOOLEAN MODE" 6224 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6225 modifier = "WITH QUERY EXPANSION" 6226 else: 6227 modifier = None 6228 6229 return self.expression( 6230 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6231 ) 6232 6233 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6234 def _parse_open_json(self) -> exp.OpenJSON: 6235 this = self._parse_bitwise() 6236 path = self._match(TokenType.COMMA) and self._parse_string() 6237 6238 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6239 this = self._parse_field(any_token=True) 6240 kind = self._parse_types() 6241 path = self._parse_string() 6242 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6243 6244 return self.expression( 6245 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6246 ) 6247 6248 expressions = None 6249 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6250 self._match_l_paren() 6251 expressions = self._parse_csv(_parse_open_json_column_def) 6252 6253 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6254 6255 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6256 args = self._parse_csv(self._parse_bitwise) 6257 6258 if self._match(TokenType.IN): 6259 return self.expression( 6260 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6261 ) 6262 6263 if haystack_first: 6264 haystack = seq_get(args, 0) 6265 needle = seq_get(args, 1) 6266 else: 6267 needle = seq_get(args, 0) 6268 haystack = seq_get(args, 1) 6269 6270 return self.expression( 6271 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6272 ) 6273 6274 def _parse_predict(self) -> exp.Predict: 6275 self._match_text_seq("MODEL") 6276 this = self._parse_table() 6277 6278 self._match(TokenType.COMMA) 6279 self._match_text_seq("TABLE") 6280 6281 return self.expression( 6282 exp.Predict, 6283 this=this, 6284 expression=self._parse_table(), 6285 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6286 ) 6287 6288 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6289 args = self._parse_csv(self._parse_table) 6290 return exp.JoinHint(this=func_name.upper(), expressions=args) 6291 6292 def _parse_substring(self) -> exp.Substring: 6293 # Postgres supports the form: substring(string [from int] [for int]) 6294 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6295 6296 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6297 6298 if self._match(TokenType.FROM): 6299 args.append(self._parse_bitwise()) 6300 if self._match(TokenType.FOR): 6301 if len(args) == 1: 6302 args.append(exp.Literal.number(1)) 6303 args.append(self._parse_bitwise()) 6304 6305 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6306 6307 def _parse_trim(self) -> exp.Trim: 6308 # https://www.w3resource.com/sql/character-functions/trim.php 6309 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6310 6311 position = None 6312 collation = None 6313 expression = None 6314 6315 if self._match_texts(self.TRIM_TYPES): 6316 position = self._prev.text.upper() 6317 6318 this = self._parse_bitwise() 6319 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6320 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6321 expression = self._parse_bitwise() 6322 6323 if invert_order: 6324 this, expression = expression, this 6325 6326 if self._match(TokenType.COLLATE): 6327 collation = self._parse_bitwise() 6328 6329 return self.expression( 6330 exp.Trim, this=this, position=position, expression=expression, collation=collation 6331 ) 6332 6333 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6334 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6335 6336 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6337 return self._parse_window(self._parse_id_var(), alias=True) 6338 6339 def _parse_respect_or_ignore_nulls( 6340 self, this: t.Optional[exp.Expression] 6341 ) -> t.Optional[exp.Expression]: 6342 if self._match_text_seq("IGNORE", "NULLS"): 6343 return self.expression(exp.IgnoreNulls, this=this) 6344 if self._match_text_seq("RESPECT", "NULLS"): 6345 return self.expression(exp.RespectNulls, this=this) 6346 return this 6347 6348 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6349 if self._match(TokenType.HAVING): 6350 self._match_texts(("MAX", "MIN")) 6351 max = self._prev.text.upper() != "MIN" 6352 return self.expression( 6353 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6354 ) 6355 6356 return this 6357 6358 def _parse_window( 6359 self, this: t.Optional[exp.Expression], alias: bool = False 6360 ) -> t.Optional[exp.Expression]: 6361 func = this 6362 comments = func.comments if isinstance(func, exp.Expression) else None 6363 6364 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6365 self._match(TokenType.WHERE) 6366 this = self.expression( 6367 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6368 ) 6369 self._match_r_paren() 6370 6371 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6372 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6373 if self._match_text_seq("WITHIN", "GROUP"): 6374 order = self._parse_wrapped(self._parse_order) 6375 this = self.expression(exp.WithinGroup, this=this, expression=order) 6376 6377 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6378 # Some dialects choose to implement and some do not. 6379 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6380 6381 # There is some code above in _parse_lambda that handles 6382 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6383 6384 # The below changes handle 6385 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6386 6387 # Oracle allows both formats 6388 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6389 # and Snowflake chose to do the same for familiarity 6390 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6391 if isinstance(this, exp.AggFunc): 6392 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6393 6394 if ignore_respect and ignore_respect is not this: 6395 ignore_respect.replace(ignore_respect.this) 6396 this = self.expression(ignore_respect.__class__, this=this) 6397 6398 this = self._parse_respect_or_ignore_nulls(this) 6399 6400 # bigquery select from window x AS (partition by ...) 6401 if alias: 6402 over = None 6403 self._match(TokenType.ALIAS) 6404 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6405 return this 6406 else: 6407 over = self._prev.text.upper() 6408 6409 if comments and isinstance(func, exp.Expression): 6410 func.pop_comments() 6411 6412 if not self._match(TokenType.L_PAREN): 6413 return self.expression( 6414 exp.Window, 6415 comments=comments, 6416 this=this, 6417 alias=self._parse_id_var(False), 6418 over=over, 6419 ) 6420 6421 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6422 6423 first = self._match(TokenType.FIRST) 6424 if self._match_text_seq("LAST"): 6425 first = False 6426 6427 partition, order = self._parse_partition_and_order() 6428 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6429 6430 if kind: 6431 self._match(TokenType.BETWEEN) 6432 start = self._parse_window_spec() 6433 self._match(TokenType.AND) 6434 end = self._parse_window_spec() 6435 6436 spec = self.expression( 6437 exp.WindowSpec, 6438 kind=kind, 6439 start=start["value"], 6440 start_side=start["side"], 6441 end=end["value"], 6442 end_side=end["side"], 6443 ) 6444 else: 6445 spec = None 6446 6447 self._match_r_paren() 6448 6449 window = self.expression( 6450 exp.Window, 6451 comments=comments, 6452 this=this, 6453 partition_by=partition, 6454 order=order, 6455 spec=spec, 6456 alias=window_alias, 6457 over=over, 6458 first=first, 6459 ) 6460 6461 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6462 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6463 return self._parse_window(window, alias=alias) 6464 6465 return window 6466 6467 def _parse_partition_and_order( 6468 self, 6469 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6470 return self._parse_partition_by(), self._parse_order() 6471 6472 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6473 self._match(TokenType.BETWEEN) 6474 6475 return { 6476 "value": ( 6477 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6478 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6479 or self._parse_bitwise() 6480 ), 6481 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6482 } 6483 6484 def _parse_alias( 6485 self, this: t.Optional[exp.Expression], explicit: bool = False 6486 ) -> t.Optional[exp.Expression]: 6487 any_token = self._match(TokenType.ALIAS) 6488 comments = self._prev_comments or [] 6489 6490 if explicit and not any_token: 6491 return this 6492 6493 if self._match(TokenType.L_PAREN): 6494 aliases = self.expression( 6495 exp.Aliases, 6496 comments=comments, 6497 this=this, 6498 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6499 ) 6500 self._match_r_paren(aliases) 6501 return aliases 6502 6503 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6504 self.STRING_ALIASES and self._parse_string_as_identifier() 6505 ) 6506 6507 if alias: 6508 comments.extend(alias.pop_comments()) 6509 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6510 column = this.this 6511 6512 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6513 if not this.comments and column and column.comments: 6514 this.comments = column.pop_comments() 6515 6516 return this 6517 6518 def _parse_id_var( 6519 self, 6520 any_token: bool = True, 6521 tokens: t.Optional[t.Collection[TokenType]] = None, 6522 ) -> t.Optional[exp.Expression]: 6523 expression = self._parse_identifier() 6524 if not expression and ( 6525 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6526 ): 6527 quoted = self._prev.token_type == TokenType.STRING 6528 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6529 6530 return expression 6531 6532 def _parse_string(self) -> t.Optional[exp.Expression]: 6533 if self._match_set(self.STRING_PARSERS): 6534 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6535 return self._parse_placeholder() 6536 6537 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6538 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6539 6540 def _parse_number(self) -> t.Optional[exp.Expression]: 6541 if self._match_set(self.NUMERIC_PARSERS): 6542 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6543 return self._parse_placeholder() 6544 6545 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6546 if self._match(TokenType.IDENTIFIER): 6547 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6548 return self._parse_placeholder() 6549 6550 def _parse_var( 6551 self, 6552 any_token: bool = False, 6553 tokens: t.Optional[t.Collection[TokenType]] = None, 6554 upper: bool = False, 6555 ) -> t.Optional[exp.Expression]: 6556 if ( 6557 (any_token and self._advance_any()) 6558 or self._match(TokenType.VAR) 6559 or (self._match_set(tokens) if tokens else False) 6560 ): 6561 return self.expression( 6562 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6563 ) 6564 return self._parse_placeholder() 6565 6566 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6567 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6568 self._advance() 6569 return self._prev 6570 return None 6571 6572 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6573 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6574 6575 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6576 return self._parse_primary() or self._parse_var(any_token=True) 6577 6578 def _parse_null(self) -> t.Optional[exp.Expression]: 6579 if self._match_set(self.NULL_TOKENS): 6580 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6581 return self._parse_placeholder() 6582 6583 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6584 if self._match(TokenType.TRUE): 6585 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6586 if self._match(TokenType.FALSE): 6587 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6588 return self._parse_placeholder() 6589 6590 def _parse_star(self) -> t.Optional[exp.Expression]: 6591 if self._match(TokenType.STAR): 6592 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6593 return self._parse_placeholder() 6594 6595 def _parse_parameter(self) -> exp.Parameter: 6596 this = self._parse_identifier() or self._parse_primary_or_var() 6597 return self.expression(exp.Parameter, this=this) 6598 6599 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6600 if self._match_set(self.PLACEHOLDER_PARSERS): 6601 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6602 if placeholder: 6603 return placeholder 6604 self._advance(-1) 6605 return None 6606 6607 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6608 if not self._match_texts(keywords): 6609 return None 6610 if self._match(TokenType.L_PAREN, advance=False): 6611 return self._parse_wrapped_csv(self._parse_expression) 6612 6613 expression = self._parse_expression() 6614 return [expression] if expression else None 6615 6616 def _parse_csv( 6617 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6618 ) -> t.List[exp.Expression]: 6619 parse_result = parse_method() 6620 items = [parse_result] if parse_result is not None else [] 6621 6622 while self._match(sep): 6623 self._add_comments(parse_result) 6624 parse_result = parse_method() 6625 if parse_result is not None: 6626 items.append(parse_result) 6627 6628 return items 6629 6630 def _parse_tokens( 6631 self, parse_method: t.Callable, expressions: t.Dict 6632 ) -> t.Optional[exp.Expression]: 6633 this = parse_method() 6634 6635 while self._match_set(expressions): 6636 this = self.expression( 6637 expressions[self._prev.token_type], 6638 this=this, 6639 comments=self._prev_comments, 6640 expression=parse_method(), 6641 ) 6642 6643 return this 6644 6645 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6646 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6647 6648 def _parse_wrapped_csv( 6649 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6650 ) -> t.List[exp.Expression]: 6651 return self._parse_wrapped( 6652 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6653 ) 6654 6655 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6656 wrapped = self._match(TokenType.L_PAREN) 6657 if not wrapped and not optional: 6658 self.raise_error("Expecting (") 6659 parse_result = parse_method() 6660 if wrapped: 6661 self._match_r_paren() 6662 return parse_result 6663 6664 def _parse_expressions(self) -> t.List[exp.Expression]: 6665 return self._parse_csv(self._parse_expression) 6666 6667 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6668 return self._parse_select() or self._parse_set_operations( 6669 self._parse_expression() if alias else self._parse_assignment() 6670 ) 6671 6672 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6673 return self._parse_query_modifiers( 6674 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6675 ) 6676 6677 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6678 this = None 6679 if self._match_texts(self.TRANSACTION_KIND): 6680 this = self._prev.text 6681 6682 self._match_texts(("TRANSACTION", "WORK")) 6683 6684 modes = [] 6685 while True: 6686 mode = [] 6687 while self._match(TokenType.VAR): 6688 mode.append(self._prev.text) 6689 6690 if mode: 6691 modes.append(" ".join(mode)) 6692 if not self._match(TokenType.COMMA): 6693 break 6694 6695 return self.expression(exp.Transaction, this=this, modes=modes) 6696 6697 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6698 chain = None 6699 savepoint = None 6700 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6701 6702 self._match_texts(("TRANSACTION", "WORK")) 6703 6704 if self._match_text_seq("TO"): 6705 self._match_text_seq("SAVEPOINT") 6706 savepoint = self._parse_id_var() 6707 6708 if self._match(TokenType.AND): 6709 chain = not self._match_text_seq("NO") 6710 self._match_text_seq("CHAIN") 6711 6712 if is_rollback: 6713 return self.expression(exp.Rollback, savepoint=savepoint) 6714 6715 return self.expression(exp.Commit, chain=chain) 6716 6717 def _parse_refresh(self) -> exp.Refresh: 6718 self._match(TokenType.TABLE) 6719 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6720 6721 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6722 if not self._match_text_seq("ADD"): 6723 return None 6724 6725 self._match(TokenType.COLUMN) 6726 exists_column = self._parse_exists(not_=True) 6727 expression = self._parse_field_def() 6728 6729 if expression: 6730 expression.set("exists", exists_column) 6731 6732 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6733 if self._match_texts(("FIRST", "AFTER")): 6734 position = self._prev.text 6735 column_position = self.expression( 6736 exp.ColumnPosition, this=self._parse_column(), position=position 6737 ) 6738 expression.set("position", column_position) 6739 6740 return expression 6741 6742 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6743 drop = self._match(TokenType.DROP) and self._parse_drop() 6744 if drop and not isinstance(drop, exp.Command): 6745 drop.set("kind", drop.args.get("kind", "COLUMN")) 6746 return drop 6747 6748 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6749 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6750 return self.expression( 6751 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6752 ) 6753 6754 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6755 index = self._index - 1 6756 6757 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6758 return self._parse_csv( 6759 lambda: self.expression( 6760 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6761 ) 6762 ) 6763 6764 self._retreat(index) 6765 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6766 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6767 6768 if self._match_text_seq("ADD", "COLUMNS"): 6769 schema = self._parse_schema() 6770 if schema: 6771 return [schema] 6772 return [] 6773 6774 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6775 6776 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6777 if self._match_texts(self.ALTER_ALTER_PARSERS): 6778 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6779 6780 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6781 # keyword after ALTER we default to parsing this statement 6782 self._match(TokenType.COLUMN) 6783 column = self._parse_field(any_token=True) 6784 6785 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6786 return self.expression(exp.AlterColumn, this=column, drop=True) 6787 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6788 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6789 if self._match(TokenType.COMMENT): 6790 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6791 if self._match_text_seq("DROP", "NOT", "NULL"): 6792 return self.expression( 6793 exp.AlterColumn, 6794 this=column, 6795 drop=True, 6796 allow_null=True, 6797 ) 6798 if self._match_text_seq("SET", "NOT", "NULL"): 6799 return self.expression( 6800 exp.AlterColumn, 6801 this=column, 6802 allow_null=False, 6803 ) 6804 self._match_text_seq("SET", "DATA") 6805 self._match_text_seq("TYPE") 6806 return self.expression( 6807 exp.AlterColumn, 6808 this=column, 6809 dtype=self._parse_types(), 6810 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6811 using=self._match(TokenType.USING) and self._parse_assignment(), 6812 ) 6813 6814 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6815 if self._match_texts(("ALL", "EVEN", "AUTO")): 6816 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6817 6818 self._match_text_seq("KEY", "DISTKEY") 6819 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6820 6821 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6822 if compound: 6823 self._match_text_seq("SORTKEY") 6824 6825 if self._match(TokenType.L_PAREN, advance=False): 6826 return self.expression( 6827 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6828 ) 6829 6830 self._match_texts(("AUTO", "NONE")) 6831 return self.expression( 6832 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6833 ) 6834 6835 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6836 index = self._index - 1 6837 6838 partition_exists = self._parse_exists() 6839 if self._match(TokenType.PARTITION, advance=False): 6840 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6841 6842 self._retreat(index) 6843 return self._parse_csv(self._parse_drop_column) 6844 6845 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6846 if self._match(TokenType.COLUMN): 6847 exists = self._parse_exists() 6848 old_column = self._parse_column() 6849 to = self._match_text_seq("TO") 6850 new_column = self._parse_column() 6851 6852 if old_column is None or to is None or new_column is None: 6853 return None 6854 6855 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6856 6857 self._match_text_seq("TO") 6858 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6859 6860 def _parse_alter_table_set(self) -> exp.AlterSet: 6861 alter_set = self.expression(exp.AlterSet) 6862 6863 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6864 "TABLE", "PROPERTIES" 6865 ): 6866 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6867 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6868 alter_set.set("expressions", [self._parse_assignment()]) 6869 elif self._match_texts(("LOGGED", "UNLOGGED")): 6870 alter_set.set("option", exp.var(self._prev.text.upper())) 6871 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6872 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6873 elif self._match_text_seq("LOCATION"): 6874 alter_set.set("location", self._parse_field()) 6875 elif self._match_text_seq("ACCESS", "METHOD"): 6876 alter_set.set("access_method", self._parse_field()) 6877 elif self._match_text_seq("TABLESPACE"): 6878 alter_set.set("tablespace", self._parse_field()) 6879 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6880 alter_set.set("file_format", [self._parse_field()]) 6881 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6882 alter_set.set("file_format", self._parse_wrapped_options()) 6883 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6884 alter_set.set("copy_options", self._parse_wrapped_options()) 6885 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6886 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6887 else: 6888 if self._match_text_seq("SERDE"): 6889 alter_set.set("serde", self._parse_field()) 6890 6891 alter_set.set("expressions", [self._parse_properties()]) 6892 6893 return alter_set 6894 6895 def _parse_alter(self) -> exp.Alter | exp.Command: 6896 start = self._prev 6897 6898 alter_token = self._match_set(self.ALTERABLES) and self._prev 6899 if not alter_token: 6900 return self._parse_as_command(start) 6901 6902 exists = self._parse_exists() 6903 only = self._match_text_seq("ONLY") 6904 this = self._parse_table(schema=True) 6905 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6906 6907 if self._next: 6908 self._advance() 6909 6910 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6911 if parser: 6912 actions = ensure_list(parser(self)) 6913 not_valid = self._match_text_seq("NOT", "VALID") 6914 options = self._parse_csv(self._parse_property) 6915 6916 if not self._curr and actions: 6917 return self.expression( 6918 exp.Alter, 6919 this=this, 6920 kind=alter_token.text.upper(), 6921 exists=exists, 6922 actions=actions, 6923 only=only, 6924 options=options, 6925 cluster=cluster, 6926 not_valid=not_valid, 6927 ) 6928 6929 return self._parse_as_command(start) 6930 6931 def _parse_merge(self) -> exp.Merge: 6932 self._match(TokenType.INTO) 6933 target = self._parse_table() 6934 6935 if target and self._match(TokenType.ALIAS, advance=False): 6936 target.set("alias", self._parse_table_alias()) 6937 6938 self._match(TokenType.USING) 6939 using = self._parse_table() 6940 6941 self._match(TokenType.ON) 6942 on = self._parse_assignment() 6943 6944 return self.expression( 6945 exp.Merge, 6946 this=target, 6947 using=using, 6948 on=on, 6949 expressions=self._parse_when_matched(), 6950 returning=self._parse_returning(), 6951 ) 6952 6953 def _parse_when_matched(self) -> t.List[exp.When]: 6954 whens = [] 6955 6956 while self._match(TokenType.WHEN): 6957 matched = not self._match(TokenType.NOT) 6958 self._match_text_seq("MATCHED") 6959 source = ( 6960 False 6961 if self._match_text_seq("BY", "TARGET") 6962 else self._match_text_seq("BY", "SOURCE") 6963 ) 6964 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6965 6966 self._match(TokenType.THEN) 6967 6968 if self._match(TokenType.INSERT): 6969 this = self._parse_star() 6970 if this: 6971 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6972 else: 6973 then = self.expression( 6974 exp.Insert, 6975 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6976 expression=self._match_text_seq("VALUES") and self._parse_value(), 6977 ) 6978 elif self._match(TokenType.UPDATE): 6979 expressions = self._parse_star() 6980 if expressions: 6981 then = self.expression(exp.Update, expressions=expressions) 6982 else: 6983 then = self.expression( 6984 exp.Update, 6985 expressions=self._match(TokenType.SET) 6986 and self._parse_csv(self._parse_equality), 6987 ) 6988 elif self._match(TokenType.DELETE): 6989 then = self.expression(exp.Var, this=self._prev.text) 6990 else: 6991 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6992 6993 whens.append( 6994 self.expression( 6995 exp.When, 6996 matched=matched, 6997 source=source, 6998 condition=condition, 6999 then=then, 7000 ) 7001 ) 7002 return whens 7003 7004 def _parse_show(self) -> t.Optional[exp.Expression]: 7005 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7006 if parser: 7007 return parser(self) 7008 return self._parse_as_command(self._prev) 7009 7010 def _parse_set_item_assignment( 7011 self, kind: t.Optional[str] = None 7012 ) -> t.Optional[exp.Expression]: 7013 index = self._index 7014 7015 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7016 return self._parse_set_transaction(global_=kind == "GLOBAL") 7017 7018 left = self._parse_primary() or self._parse_column() 7019 assignment_delimiter = self._match_texts(("=", "TO")) 7020 7021 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7022 self._retreat(index) 7023 return None 7024 7025 right = self._parse_statement() or self._parse_id_var() 7026 if isinstance(right, (exp.Column, exp.Identifier)): 7027 right = exp.var(right.name) 7028 7029 this = self.expression(exp.EQ, this=left, expression=right) 7030 return self.expression(exp.SetItem, this=this, kind=kind) 7031 7032 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7033 self._match_text_seq("TRANSACTION") 7034 characteristics = self._parse_csv( 7035 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7036 ) 7037 return self.expression( 7038 exp.SetItem, 7039 expressions=characteristics, 7040 kind="TRANSACTION", 7041 **{"global": global_}, # type: ignore 7042 ) 7043 7044 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7045 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7046 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7047 7048 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7049 index = self._index 7050 set_ = self.expression( 7051 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7052 ) 7053 7054 if self._curr: 7055 self._retreat(index) 7056 return self._parse_as_command(self._prev) 7057 7058 return set_ 7059 7060 def _parse_var_from_options( 7061 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7062 ) -> t.Optional[exp.Var]: 7063 start = self._curr 7064 if not start: 7065 return None 7066 7067 option = start.text.upper() 7068 continuations = options.get(option) 7069 7070 index = self._index 7071 self._advance() 7072 for keywords in continuations or []: 7073 if isinstance(keywords, str): 7074 keywords = (keywords,) 7075 7076 if self._match_text_seq(*keywords): 7077 option = f"{option} {' '.join(keywords)}" 7078 break 7079 else: 7080 if continuations or continuations is None: 7081 if raise_unmatched: 7082 self.raise_error(f"Unknown option {option}") 7083 7084 self._retreat(index) 7085 return None 7086 7087 return exp.var(option) 7088 7089 def _parse_as_command(self, start: Token) -> exp.Command: 7090 while self._curr: 7091 self._advance() 7092 text = self._find_sql(start, self._prev) 7093 size = len(start.text) 7094 self._warn_unsupported() 7095 return exp.Command(this=text[:size], expression=text[size:]) 7096 7097 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7098 settings = [] 7099 7100 self._match_l_paren() 7101 kind = self._parse_id_var() 7102 7103 if self._match(TokenType.L_PAREN): 7104 while True: 7105 key = self._parse_id_var() 7106 value = self._parse_primary() 7107 7108 if not key and value is None: 7109 break 7110 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7111 self._match(TokenType.R_PAREN) 7112 7113 self._match_r_paren() 7114 7115 return self.expression( 7116 exp.DictProperty, 7117 this=this, 7118 kind=kind.this if kind else None, 7119 settings=settings, 7120 ) 7121 7122 def _parse_dict_range(self, this: str) -> exp.DictRange: 7123 self._match_l_paren() 7124 has_min = self._match_text_seq("MIN") 7125 if has_min: 7126 min = self._parse_var() or self._parse_primary() 7127 self._match_text_seq("MAX") 7128 max = self._parse_var() or self._parse_primary() 7129 else: 7130 max = self._parse_var() or self._parse_primary() 7131 min = exp.Literal.number(0) 7132 self._match_r_paren() 7133 return self.expression(exp.DictRange, this=this, min=min, max=max) 7134 7135 def _parse_comprehension( 7136 self, this: t.Optional[exp.Expression] 7137 ) -> t.Optional[exp.Comprehension]: 7138 index = self._index 7139 expression = self._parse_column() 7140 if not self._match(TokenType.IN): 7141 self._retreat(index - 1) 7142 return None 7143 iterator = self._parse_column() 7144 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7145 return self.expression( 7146 exp.Comprehension, 7147 this=this, 7148 expression=expression, 7149 iterator=iterator, 7150 condition=condition, 7151 ) 7152 7153 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7154 if self._match(TokenType.HEREDOC_STRING): 7155 return self.expression(exp.Heredoc, this=self._prev.text) 7156 7157 if not self._match_text_seq("$"): 7158 return None 7159 7160 tags = ["$"] 7161 tag_text = None 7162 7163 if self._is_connected(): 7164 self._advance() 7165 tags.append(self._prev.text.upper()) 7166 else: 7167 self.raise_error("No closing $ found") 7168 7169 if tags[-1] != "$": 7170 if self._is_connected() and self._match_text_seq("$"): 7171 tag_text = tags[-1] 7172 tags.append("$") 7173 else: 7174 self.raise_error("No closing $ found") 7175 7176 heredoc_start = self._curr 7177 7178 while self._curr: 7179 if self._match_text_seq(*tags, advance=False): 7180 this = self._find_sql(heredoc_start, self._prev) 7181 self._advance(len(tags)) 7182 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7183 7184 self._advance() 7185 7186 self.raise_error(f"No closing {''.join(tags)} found") 7187 return None 7188 7189 def _find_parser( 7190 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7191 ) -> t.Optional[t.Callable]: 7192 if not self._curr: 7193 return None 7194 7195 index = self._index 7196 this = [] 7197 while True: 7198 # The current token might be multiple words 7199 curr = self._curr.text.upper() 7200 key = curr.split(" ") 7201 this.append(curr) 7202 7203 self._advance() 7204 result, trie = in_trie(trie, key) 7205 if result == TrieResult.FAILED: 7206 break 7207 7208 if result == TrieResult.EXISTS: 7209 subparser = parsers[" ".join(this)] 7210 return subparser 7211 7212 self._retreat(index) 7213 return None 7214 7215 def _match(self, token_type, advance=True, expression=None): 7216 if not self._curr: 7217 return None 7218 7219 if self._curr.token_type == token_type: 7220 if advance: 7221 self._advance() 7222 self._add_comments(expression) 7223 return True 7224 7225 return None 7226 7227 def _match_set(self, types, advance=True): 7228 if not self._curr: 7229 return None 7230 7231 if self._curr.token_type in types: 7232 if advance: 7233 self._advance() 7234 return True 7235 7236 return None 7237 7238 def _match_pair(self, token_type_a, token_type_b, advance=True): 7239 if not self._curr or not self._next: 7240 return None 7241 7242 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7243 if advance: 7244 self._advance(2) 7245 return True 7246 7247 return None 7248 7249 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7250 if not self._match(TokenType.L_PAREN, expression=expression): 7251 self.raise_error("Expecting (") 7252 7253 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7254 if not self._match(TokenType.R_PAREN, expression=expression): 7255 self.raise_error("Expecting )") 7256 7257 def _match_texts(self, texts, advance=True): 7258 if ( 7259 self._curr 7260 and self._curr.token_type != TokenType.STRING 7261 and self._curr.text.upper() in texts 7262 ): 7263 if advance: 7264 self._advance() 7265 return True 7266 return None 7267 7268 def _match_text_seq(self, *texts, advance=True): 7269 index = self._index 7270 for text in texts: 7271 if ( 7272 self._curr 7273 and self._curr.token_type != TokenType.STRING 7274 and self._curr.text.upper() == text 7275 ): 7276 self._advance() 7277 else: 7278 self._retreat(index) 7279 return None 7280 7281 if not advance: 7282 self._retreat(index) 7283 7284 return True 7285 7286 def _replace_lambda( 7287 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7288 ) -> t.Optional[exp.Expression]: 7289 if not node: 7290 return node 7291 7292 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7293 7294 for column in node.find_all(exp.Column): 7295 typ = lambda_types.get(column.parts[0].name) 7296 if typ is not None: 7297 dot_or_id = column.to_dot() if column.table else column.this 7298 7299 if typ: 7300 dot_or_id = self.expression( 7301 exp.Cast, 7302 this=dot_or_id, 7303 to=typ, 7304 ) 7305 7306 parent = column.parent 7307 7308 while isinstance(parent, exp.Dot): 7309 if not isinstance(parent.parent, exp.Dot): 7310 parent.replace(dot_or_id) 7311 break 7312 parent = parent.parent 7313 else: 7314 if column is node: 7315 node = dot_or_id 7316 else: 7317 column.replace(dot_or_id) 7318 return node 7319 7320 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7321 start = self._prev 7322 7323 # Not to be confused with TRUNCATE(number, decimals) function call 7324 if self._match(TokenType.L_PAREN): 7325 self._retreat(self._index - 2) 7326 return self._parse_function() 7327 7328 # Clickhouse supports TRUNCATE DATABASE as well 7329 is_database = self._match(TokenType.DATABASE) 7330 7331 self._match(TokenType.TABLE) 7332 7333 exists = self._parse_exists(not_=False) 7334 7335 expressions = self._parse_csv( 7336 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7337 ) 7338 7339 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7340 7341 if self._match_text_seq("RESTART", "IDENTITY"): 7342 identity = "RESTART" 7343 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7344 identity = "CONTINUE" 7345 else: 7346 identity = None 7347 7348 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7349 option = self._prev.text 7350 else: 7351 option = None 7352 7353 partition = self._parse_partition() 7354 7355 # Fallback case 7356 if self._curr: 7357 return self._parse_as_command(start) 7358 7359 return self.expression( 7360 exp.TruncateTable, 7361 expressions=expressions, 7362 is_database=is_database, 7363 exists=exists, 7364 cluster=cluster, 7365 identity=identity, 7366 option=option, 7367 partition=partition, 7368 ) 7369 7370 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7371 this = self._parse_ordered(self._parse_opclass) 7372 7373 if not self._match(TokenType.WITH): 7374 return this 7375 7376 op = self._parse_var(any_token=True) 7377 7378 return self.expression(exp.WithOperator, this=this, op=op) 7379 7380 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7381 self._match(TokenType.EQ) 7382 self._match(TokenType.L_PAREN) 7383 7384 opts: t.List[t.Optional[exp.Expression]] = [] 7385 while self._curr and not self._match(TokenType.R_PAREN): 7386 if self._match_text_seq("FORMAT_NAME", "="): 7387 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7388 # so we parse it separately to use _parse_field() 7389 prop = self.expression( 7390 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7391 ) 7392 opts.append(prop) 7393 else: 7394 opts.append(self._parse_property()) 7395 7396 self._match(TokenType.COMMA) 7397 7398 return opts 7399 7400 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7401 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7402 7403 options = [] 7404 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7405 option = self._parse_var(any_token=True) 7406 prev = self._prev.text.upper() 7407 7408 # Different dialects might separate options and values by white space, "=" and "AS" 7409 self._match(TokenType.EQ) 7410 self._match(TokenType.ALIAS) 7411 7412 param = self.expression(exp.CopyParameter, this=option) 7413 7414 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7415 TokenType.L_PAREN, advance=False 7416 ): 7417 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7418 param.set("expressions", self._parse_wrapped_options()) 7419 elif prev == "FILE_FORMAT": 7420 # T-SQL's external file format case 7421 param.set("expression", self._parse_field()) 7422 else: 7423 param.set("expression", self._parse_unquoted_field()) 7424 7425 options.append(param) 7426 self._match(sep) 7427 7428 return options 7429 7430 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7431 expr = self.expression(exp.Credentials) 7432 7433 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7434 expr.set("storage", self._parse_field()) 7435 if self._match_text_seq("CREDENTIALS"): 7436 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7437 creds = ( 7438 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7439 ) 7440 expr.set("credentials", creds) 7441 if self._match_text_seq("ENCRYPTION"): 7442 expr.set("encryption", self._parse_wrapped_options()) 7443 if self._match_text_seq("IAM_ROLE"): 7444 expr.set("iam_role", self._parse_field()) 7445 if self._match_text_seq("REGION"): 7446 expr.set("region", self._parse_field()) 7447 7448 return expr 7449 7450 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7451 return self._parse_field() 7452 7453 def _parse_copy(self) -> exp.Copy | exp.Command: 7454 start = self._prev 7455 7456 self._match(TokenType.INTO) 7457 7458 this = ( 7459 self._parse_select(nested=True, parse_subquery_alias=False) 7460 if self._match(TokenType.L_PAREN, advance=False) 7461 else self._parse_table(schema=True) 7462 ) 7463 7464 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7465 7466 files = self._parse_csv(self._parse_file_location) 7467 credentials = self._parse_credentials() 7468 7469 self._match_text_seq("WITH") 7470 7471 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7472 7473 # Fallback case 7474 if self._curr: 7475 return self._parse_as_command(start) 7476 7477 return self.expression( 7478 exp.Copy, 7479 this=this, 7480 kind=kind, 7481 credentials=credentials, 7482 files=files, 7483 params=params, 7484 ) 7485 7486 def _parse_normalize(self) -> exp.Normalize: 7487 return self.expression( 7488 exp.Normalize, 7489 this=self._parse_bitwise(), 7490 form=self._match(TokenType.COMMA) and self._parse_var(), 7491 ) 7492 7493 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7494 if self._match_text_seq("COLUMNS", "(", advance=False): 7495 this = self._parse_function() 7496 if isinstance(this, exp.Columns): 7497 this.set("unpack", True) 7498 return this 7499 7500 return self.expression( 7501 exp.Star, 7502 **{ # type: ignore 7503 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7504 "replace": self._parse_star_op("REPLACE"), 7505 "rename": self._parse_star_op("RENAME"), 7506 }, 7507 ) 7508 7509 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7510 privilege_parts = [] 7511 7512 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7513 # (end of privilege list) or L_PAREN (start of column list) are met 7514 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7515 privilege_parts.append(self._curr.text.upper()) 7516 self._advance() 7517 7518 this = exp.var(" ".join(privilege_parts)) 7519 expressions = ( 7520 self._parse_wrapped_csv(self._parse_column) 7521 if self._match(TokenType.L_PAREN, advance=False) 7522 else None 7523 ) 7524 7525 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7526 7527 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7528 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7529 principal = self._parse_id_var() 7530 7531 if not principal: 7532 return None 7533 7534 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7535 7536 def _parse_grant(self) -> exp.Grant | exp.Command: 7537 start = self._prev 7538 7539 privileges = self._parse_csv(self._parse_grant_privilege) 7540 7541 self._match(TokenType.ON) 7542 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7543 7544 # Attempt to parse the securable e.g. MySQL allows names 7545 # such as "foo.*", "*.*" which are not easily parseable yet 7546 securable = self._try_parse(self._parse_table_parts) 7547 7548 if not securable or not self._match_text_seq("TO"): 7549 return self._parse_as_command(start) 7550 7551 principals = self._parse_csv(self._parse_grant_principal) 7552 7553 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7554 7555 if self._curr: 7556 return self._parse_as_command(start) 7557 7558 return self.expression( 7559 exp.Grant, 7560 privileges=privileges, 7561 kind=kind, 7562 securable=securable, 7563 principals=principals, 7564 grant_option=grant_option, 7565 ) 7566 7567 def _parse_overlay(self) -> exp.Overlay: 7568 return self.expression( 7569 exp.Overlay, 7570 **{ # type: ignore 7571 "this": self._parse_bitwise(), 7572 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7573 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7574 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7575 }, 7576 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.DECIMAL256, 356 TokenType.UDECIMAL, 357 TokenType.BIGDECIMAL, 358 TokenType.UUID, 359 TokenType.GEOGRAPHY, 360 TokenType.GEOMETRY, 361 TokenType.POINT, 362 TokenType.RING, 363 TokenType.LINESTRING, 364 TokenType.MULTILINESTRING, 365 TokenType.POLYGON, 366 TokenType.MULTIPOLYGON, 367 TokenType.HLLSKETCH, 368 TokenType.HSTORE, 369 TokenType.PSEUDO_TYPE, 370 TokenType.SUPER, 371 TokenType.SERIAL, 372 TokenType.SMALLSERIAL, 373 TokenType.BIGSERIAL, 374 TokenType.XML, 375 TokenType.YEAR, 376 TokenType.UNIQUEIDENTIFIER, 377 TokenType.USERDEFINED, 378 TokenType.MONEY, 379 TokenType.SMALLMONEY, 380 TokenType.ROWVERSION, 381 TokenType.IMAGE, 382 TokenType.VARIANT, 383 TokenType.VECTOR, 384 TokenType.OBJECT, 385 TokenType.OBJECT_IDENTIFIER, 386 TokenType.INET, 387 TokenType.IPADDRESS, 388 TokenType.IPPREFIX, 389 TokenType.IPV4, 390 TokenType.IPV6, 391 TokenType.UNKNOWN, 392 TokenType.NULL, 393 TokenType.NAME, 394 TokenType.TDIGEST, 395 *ENUM_TYPE_TOKENS, 396 *NESTED_TYPE_TOKENS, 397 *AGGREGATE_TYPE_TOKENS, 398 } 399 400 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 401 TokenType.BIGINT: TokenType.UBIGINT, 402 TokenType.INT: TokenType.UINT, 403 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 404 TokenType.SMALLINT: TokenType.USMALLINT, 405 TokenType.TINYINT: TokenType.UTINYINT, 406 TokenType.DECIMAL: TokenType.UDECIMAL, 407 } 408 409 SUBQUERY_PREDICATES = { 410 TokenType.ANY: exp.Any, 411 TokenType.ALL: exp.All, 412 TokenType.EXISTS: exp.Exists, 413 TokenType.SOME: exp.Any, 414 } 415 416 RESERVED_TOKENS = { 417 *Tokenizer.SINGLE_TOKENS.values(), 418 TokenType.SELECT, 419 } - {TokenType.IDENTIFIER} 420 421 DB_CREATABLES = { 422 TokenType.DATABASE, 423 TokenType.DICTIONARY, 424 TokenType.MODEL, 425 TokenType.SCHEMA, 426 TokenType.SEQUENCE, 427 TokenType.STORAGE_INTEGRATION, 428 TokenType.TABLE, 429 TokenType.TAG, 430 TokenType.VIEW, 431 TokenType.WAREHOUSE, 432 TokenType.STREAMLIT, 433 } 434 435 CREATABLES = { 436 TokenType.COLUMN, 437 TokenType.CONSTRAINT, 438 TokenType.FOREIGN_KEY, 439 TokenType.FUNCTION, 440 TokenType.INDEX, 441 TokenType.PROCEDURE, 442 *DB_CREATABLES, 443 } 444 445 ALTERABLES = { 446 TokenType.INDEX, 447 TokenType.TABLE, 448 TokenType.VIEW, 449 } 450 451 # Tokens that can represent identifiers 452 ID_VAR_TOKENS = { 453 TokenType.ALL, 454 TokenType.VAR, 455 TokenType.ANTI, 456 TokenType.APPLY, 457 TokenType.ASC, 458 TokenType.ASOF, 459 TokenType.AUTO_INCREMENT, 460 TokenType.BEGIN, 461 TokenType.BPCHAR, 462 TokenType.CACHE, 463 TokenType.CASE, 464 TokenType.COLLATE, 465 TokenType.COMMAND, 466 TokenType.COMMENT, 467 TokenType.COMMIT, 468 TokenType.CONSTRAINT, 469 TokenType.COPY, 470 TokenType.CUBE, 471 TokenType.DEFAULT, 472 TokenType.DELETE, 473 TokenType.DESC, 474 TokenType.DESCRIBE, 475 TokenType.DICTIONARY, 476 TokenType.DIV, 477 TokenType.END, 478 TokenType.EXECUTE, 479 TokenType.ESCAPE, 480 TokenType.FALSE, 481 TokenType.FIRST, 482 TokenType.FILTER, 483 TokenType.FINAL, 484 TokenType.FORMAT, 485 TokenType.FULL, 486 TokenType.IDENTIFIER, 487 TokenType.IS, 488 TokenType.ISNULL, 489 TokenType.INTERVAL, 490 TokenType.KEEP, 491 TokenType.KILL, 492 TokenType.LEFT, 493 TokenType.LOAD, 494 TokenType.MERGE, 495 TokenType.NATURAL, 496 TokenType.NEXT, 497 TokenType.OFFSET, 498 TokenType.OPERATOR, 499 TokenType.ORDINALITY, 500 TokenType.OVERLAPS, 501 TokenType.OVERWRITE, 502 TokenType.PARTITION, 503 TokenType.PERCENT, 504 TokenType.PIVOT, 505 TokenType.PRAGMA, 506 TokenType.RANGE, 507 TokenType.RECURSIVE, 508 TokenType.REFERENCES, 509 TokenType.REFRESH, 510 TokenType.RENAME, 511 TokenType.REPLACE, 512 TokenType.RIGHT, 513 TokenType.ROLLUP, 514 TokenType.ROW, 515 TokenType.ROWS, 516 TokenType.SEMI, 517 TokenType.SET, 518 TokenType.SETTINGS, 519 TokenType.SHOW, 520 TokenType.TEMPORARY, 521 TokenType.TOP, 522 TokenType.TRUE, 523 TokenType.TRUNCATE, 524 TokenType.UNIQUE, 525 TokenType.UNNEST, 526 TokenType.UNPIVOT, 527 TokenType.UPDATE, 528 TokenType.USE, 529 TokenType.VOLATILE, 530 TokenType.WINDOW, 531 *CREATABLES, 532 *SUBQUERY_PREDICATES, 533 *TYPE_TOKENS, 534 *NO_PAREN_FUNCTIONS, 535 } 536 ID_VAR_TOKENS.remove(TokenType.UNION) 537 538 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 539 540 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 541 TokenType.ANTI, 542 TokenType.APPLY, 543 TokenType.ASOF, 544 TokenType.FULL, 545 TokenType.LEFT, 546 TokenType.LOCK, 547 TokenType.NATURAL, 548 TokenType.OFFSET, 549 TokenType.RIGHT, 550 TokenType.SEMI, 551 TokenType.WINDOW, 552 } 553 554 ALIAS_TOKENS = ID_VAR_TOKENS 555 556 ARRAY_CONSTRUCTORS = { 557 "ARRAY": exp.Array, 558 "LIST": exp.List, 559 } 560 561 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 562 563 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 564 565 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 566 567 FUNC_TOKENS = { 568 TokenType.COLLATE, 569 TokenType.COMMAND, 570 TokenType.CURRENT_DATE, 571 TokenType.CURRENT_DATETIME, 572 TokenType.CURRENT_TIMESTAMP, 573 TokenType.CURRENT_TIME, 574 TokenType.CURRENT_USER, 575 TokenType.FILTER, 576 TokenType.FIRST, 577 TokenType.FORMAT, 578 TokenType.GLOB, 579 TokenType.IDENTIFIER, 580 TokenType.INDEX, 581 TokenType.ISNULL, 582 TokenType.ILIKE, 583 TokenType.INSERT, 584 TokenType.LIKE, 585 TokenType.MERGE, 586 TokenType.OFFSET, 587 TokenType.PRIMARY_KEY, 588 TokenType.RANGE, 589 TokenType.REPLACE, 590 TokenType.RLIKE, 591 TokenType.ROW, 592 TokenType.UNNEST, 593 TokenType.VAR, 594 TokenType.LEFT, 595 TokenType.RIGHT, 596 TokenType.SEQUENCE, 597 TokenType.DATE, 598 TokenType.DATETIME, 599 TokenType.TABLE, 600 TokenType.TIMESTAMP, 601 TokenType.TIMESTAMPTZ, 602 TokenType.TRUNCATE, 603 TokenType.WINDOW, 604 TokenType.XOR, 605 *TYPE_TOKENS, 606 *SUBQUERY_PREDICATES, 607 } 608 609 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.AND: exp.And, 611 } 612 613 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 614 TokenType.COLON_EQ: exp.PropertyEQ, 615 } 616 617 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.OR: exp.Or, 619 } 620 621 EQUALITY = { 622 TokenType.EQ: exp.EQ, 623 TokenType.NEQ: exp.NEQ, 624 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 625 } 626 627 COMPARISON = { 628 TokenType.GT: exp.GT, 629 TokenType.GTE: exp.GTE, 630 TokenType.LT: exp.LT, 631 TokenType.LTE: exp.LTE, 632 } 633 634 BITWISE = { 635 TokenType.AMP: exp.BitwiseAnd, 636 TokenType.CARET: exp.BitwiseXor, 637 TokenType.PIPE: exp.BitwiseOr, 638 } 639 640 TERM = { 641 TokenType.DASH: exp.Sub, 642 TokenType.PLUS: exp.Add, 643 TokenType.MOD: exp.Mod, 644 TokenType.COLLATE: exp.Collate, 645 } 646 647 FACTOR = { 648 TokenType.DIV: exp.IntDiv, 649 TokenType.LR_ARROW: exp.Distance, 650 TokenType.SLASH: exp.Div, 651 TokenType.STAR: exp.Mul, 652 } 653 654 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 655 656 TIMES = { 657 TokenType.TIME, 658 TokenType.TIMETZ, 659 } 660 661 TIMESTAMPS = { 662 TokenType.TIMESTAMP, 663 TokenType.TIMESTAMPTZ, 664 TokenType.TIMESTAMPLTZ, 665 *TIMES, 666 } 667 668 SET_OPERATIONS = { 669 TokenType.UNION, 670 TokenType.INTERSECT, 671 TokenType.EXCEPT, 672 } 673 674 JOIN_METHODS = { 675 TokenType.ASOF, 676 TokenType.NATURAL, 677 TokenType.POSITIONAL, 678 } 679 680 JOIN_SIDES = { 681 TokenType.LEFT, 682 TokenType.RIGHT, 683 TokenType.FULL, 684 } 685 686 JOIN_KINDS = { 687 TokenType.ANTI, 688 TokenType.CROSS, 689 TokenType.INNER, 690 TokenType.OUTER, 691 TokenType.SEMI, 692 TokenType.STRAIGHT_JOIN, 693 } 694 695 JOIN_HINTS: t.Set[str] = set() 696 697 LAMBDAS = { 698 TokenType.ARROW: lambda self, expressions: self.expression( 699 exp.Lambda, 700 this=self._replace_lambda( 701 self._parse_assignment(), 702 expressions, 703 ), 704 expressions=expressions, 705 ), 706 TokenType.FARROW: lambda self, expressions: self.expression( 707 exp.Kwarg, 708 this=exp.var(expressions[0].name), 709 expression=self._parse_assignment(), 710 ), 711 } 712 713 COLUMN_OPERATORS = { 714 TokenType.DOT: None, 715 TokenType.DCOLON: lambda self, this, to: self.expression( 716 exp.Cast if self.STRICT_CAST else exp.TryCast, 717 this=this, 718 to=to, 719 ), 720 TokenType.ARROW: lambda self, this, path: self.expression( 721 exp.JSONExtract, 722 this=this, 723 expression=self.dialect.to_json_path(path), 724 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 725 ), 726 TokenType.DARROW: lambda self, this, path: self.expression( 727 exp.JSONExtractScalar, 728 this=this, 729 expression=self.dialect.to_json_path(path), 730 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 731 ), 732 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 733 exp.JSONBExtract, 734 this=this, 735 expression=path, 736 ), 737 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 738 exp.JSONBExtractScalar, 739 this=this, 740 expression=path, 741 ), 742 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 743 exp.JSONBContains, 744 this=this, 745 expression=key, 746 ), 747 } 748 749 EXPRESSION_PARSERS = { 750 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 751 exp.Column: lambda self: self._parse_column(), 752 exp.Condition: lambda self: self._parse_assignment(), 753 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 754 exp.Expression: lambda self: self._parse_expression(), 755 exp.From: lambda self: self._parse_from(joins=True), 756 exp.Group: lambda self: self._parse_group(), 757 exp.Having: lambda self: self._parse_having(), 758 exp.Identifier: lambda self: self._parse_id_var(), 759 exp.Join: lambda self: self._parse_join(), 760 exp.Lambda: lambda self: self._parse_lambda(), 761 exp.Lateral: lambda self: self._parse_lateral(), 762 exp.Limit: lambda self: self._parse_limit(), 763 exp.Offset: lambda self: self._parse_offset(), 764 exp.Order: lambda self: self._parse_order(), 765 exp.Ordered: lambda self: self._parse_ordered(), 766 exp.Properties: lambda self: self._parse_properties(), 767 exp.Qualify: lambda self: self._parse_qualify(), 768 exp.Returning: lambda self: self._parse_returning(), 769 exp.Select: lambda self: self._parse_select(), 770 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 771 exp.Table: lambda self: self._parse_table_parts(), 772 exp.TableAlias: lambda self: self._parse_table_alias(), 773 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 774 exp.Where: lambda self: self._parse_where(), 775 exp.Window: lambda self: self._parse_named_window(), 776 exp.With: lambda self: self._parse_with(), 777 "JOIN_TYPE": lambda self: self._parse_join_parts(), 778 } 779 780 STATEMENT_PARSERS = { 781 TokenType.ALTER: lambda self: self._parse_alter(), 782 TokenType.BEGIN: lambda self: self._parse_transaction(), 783 TokenType.CACHE: lambda self: self._parse_cache(), 784 TokenType.COMMENT: lambda self: self._parse_comment(), 785 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 786 TokenType.COPY: lambda self: self._parse_copy(), 787 TokenType.CREATE: lambda self: self._parse_create(), 788 TokenType.DELETE: lambda self: self._parse_delete(), 789 TokenType.DESC: lambda self: self._parse_describe(), 790 TokenType.DESCRIBE: lambda self: self._parse_describe(), 791 TokenType.DROP: lambda self: self._parse_drop(), 792 TokenType.GRANT: lambda self: self._parse_grant(), 793 TokenType.INSERT: lambda self: self._parse_insert(), 794 TokenType.KILL: lambda self: self._parse_kill(), 795 TokenType.LOAD: lambda self: self._parse_load(), 796 TokenType.MERGE: lambda self: self._parse_merge(), 797 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 798 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 799 TokenType.REFRESH: lambda self: self._parse_refresh(), 800 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 801 TokenType.SET: lambda self: self._parse_set(), 802 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 803 TokenType.UNCACHE: lambda self: self._parse_uncache(), 804 TokenType.UPDATE: lambda self: self._parse_update(), 805 TokenType.USE: lambda self: self.expression( 806 exp.Use, 807 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 808 this=self._parse_table(schema=False), 809 ), 810 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 811 } 812 813 UNARY_PARSERS = { 814 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 815 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 816 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 817 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 818 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 819 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 820 } 821 822 STRING_PARSERS = { 823 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 824 exp.RawString, this=token.text 825 ), 826 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 827 exp.National, this=token.text 828 ), 829 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 830 TokenType.STRING: lambda self, token: self.expression( 831 exp.Literal, this=token.text, is_string=True 832 ), 833 TokenType.UNICODE_STRING: lambda self, token: self.expression( 834 exp.UnicodeString, 835 this=token.text, 836 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 837 ), 838 } 839 840 NUMERIC_PARSERS = { 841 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 842 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 843 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 844 TokenType.NUMBER: lambda self, token: self.expression( 845 exp.Literal, this=token.text, is_string=False 846 ), 847 } 848 849 PRIMARY_PARSERS = { 850 **STRING_PARSERS, 851 **NUMERIC_PARSERS, 852 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 853 TokenType.NULL: lambda self, _: self.expression(exp.Null), 854 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 855 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 856 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 857 TokenType.STAR: lambda self, _: self._parse_star_ops(), 858 } 859 860 PLACEHOLDER_PARSERS = { 861 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 862 TokenType.PARAMETER: lambda self: self._parse_parameter(), 863 TokenType.COLON: lambda self: ( 864 self.expression(exp.Placeholder, this=self._prev.text) 865 if self._match_set(self.ID_VAR_TOKENS) 866 else None 867 ), 868 } 869 870 RANGE_PARSERS = { 871 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 872 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 873 TokenType.GLOB: binary_range_parser(exp.Glob), 874 TokenType.ILIKE: binary_range_parser(exp.ILike), 875 TokenType.IN: lambda self, this: self._parse_in(this), 876 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 877 TokenType.IS: lambda self, this: self._parse_is(this), 878 TokenType.LIKE: binary_range_parser(exp.Like), 879 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 880 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 881 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 882 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 883 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 884 } 885 886 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 887 "ALLOWED_VALUES": lambda self: self.expression( 888 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 889 ), 890 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 891 "AUTO": lambda self: self._parse_auto_property(), 892 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 893 "BACKUP": lambda self: self.expression( 894 exp.BackupProperty, this=self._parse_var(any_token=True) 895 ), 896 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 897 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 898 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 899 "CHECKSUM": lambda self: self._parse_checksum(), 900 "CLUSTER BY": lambda self: self._parse_cluster(), 901 "CLUSTERED": lambda self: self._parse_clustered_by(), 902 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 903 exp.CollateProperty, **kwargs 904 ), 905 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 906 "CONTAINS": lambda self: self._parse_contains_property(), 907 "COPY": lambda self: self._parse_copy_property(), 908 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 909 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 910 "DEFINER": lambda self: self._parse_definer(), 911 "DETERMINISTIC": lambda self: self.expression( 912 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 913 ), 914 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 915 "DUPLICATE": lambda self: self._parse_duplicate(), 916 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 917 "DISTKEY": lambda self: self._parse_distkey(), 918 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 919 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 920 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 921 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 922 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 923 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 924 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 925 "FREESPACE": lambda self: self._parse_freespace(), 926 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 927 "HEAP": lambda self: self.expression(exp.HeapProperty), 928 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 929 "IMMUTABLE": lambda self: self.expression( 930 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 931 ), 932 "INHERITS": lambda self: self.expression( 933 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 934 ), 935 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 936 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 937 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 938 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 939 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 940 "LIKE": lambda self: self._parse_create_like(), 941 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 942 "LOCK": lambda self: self._parse_locking(), 943 "LOCKING": lambda self: self._parse_locking(), 944 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 945 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 946 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 947 "MODIFIES": lambda self: self._parse_modifies_property(), 948 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 949 "NO": lambda self: self._parse_no_property(), 950 "ON": lambda self: self._parse_on_property(), 951 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 952 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 953 "PARTITION": lambda self: self._parse_partitioned_of(), 954 "PARTITION BY": lambda self: self._parse_partitioned_by(), 955 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 956 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 957 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 958 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 959 "READS": lambda self: self._parse_reads_property(), 960 "REMOTE": lambda self: self._parse_remote_with_connection(), 961 "RETURNS": lambda self: self._parse_returns(), 962 "STRICT": lambda self: self.expression(exp.StrictProperty), 963 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 964 "ROW": lambda self: self._parse_row(), 965 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 966 "SAMPLE": lambda self: self.expression( 967 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 968 ), 969 "SECURE": lambda self: self.expression(exp.SecureProperty), 970 "SECURITY": lambda self: self._parse_security(), 971 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 972 "SETTINGS": lambda self: self._parse_settings_property(), 973 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 974 "SORTKEY": lambda self: self._parse_sortkey(), 975 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 976 "STABLE": lambda self: self.expression( 977 exp.StabilityProperty, this=exp.Literal.string("STABLE") 978 ), 979 "STORED": lambda self: self._parse_stored(), 980 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 981 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 982 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 983 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 984 "TO": lambda self: self._parse_to_table(), 985 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 986 "TRANSFORM": lambda self: self.expression( 987 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 988 ), 989 "TTL": lambda self: self._parse_ttl(), 990 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 991 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 992 "VOLATILE": lambda self: self._parse_volatile_property(), 993 "WITH": lambda self: self._parse_with_property(), 994 } 995 996 CONSTRAINT_PARSERS = { 997 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 998 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 999 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1000 "CHARACTER SET": lambda self: self.expression( 1001 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1002 ), 1003 "CHECK": lambda self: self.expression( 1004 exp.CheckColumnConstraint, 1005 this=self._parse_wrapped(self._parse_assignment), 1006 enforced=self._match_text_seq("ENFORCED"), 1007 ), 1008 "COLLATE": lambda self: self.expression( 1009 exp.CollateColumnConstraint, 1010 this=self._parse_identifier() or self._parse_column(), 1011 ), 1012 "COMMENT": lambda self: self.expression( 1013 exp.CommentColumnConstraint, this=self._parse_string() 1014 ), 1015 "COMPRESS": lambda self: self._parse_compress(), 1016 "CLUSTERED": lambda self: self.expression( 1017 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1018 ), 1019 "NONCLUSTERED": lambda self: self.expression( 1020 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1021 ), 1022 "DEFAULT": lambda self: self.expression( 1023 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1024 ), 1025 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1026 "EPHEMERAL": lambda self: self.expression( 1027 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1028 ), 1029 "EXCLUDE": lambda self: self.expression( 1030 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1031 ), 1032 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1033 "FORMAT": lambda self: self.expression( 1034 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1035 ), 1036 "GENERATED": lambda self: self._parse_generated_as_identity(), 1037 "IDENTITY": lambda self: self._parse_auto_increment(), 1038 "INLINE": lambda self: self._parse_inline(), 1039 "LIKE": lambda self: self._parse_create_like(), 1040 "NOT": lambda self: self._parse_not_constraint(), 1041 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1042 "ON": lambda self: ( 1043 self._match(TokenType.UPDATE) 1044 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1045 ) 1046 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1047 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1048 "PERIOD": lambda self: self._parse_period_for_system_time(), 1049 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1050 "REFERENCES": lambda self: self._parse_references(match=False), 1051 "TITLE": lambda self: self.expression( 1052 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1053 ), 1054 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1055 "UNIQUE": lambda self: self._parse_unique(), 1056 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1057 "WITH": lambda self: self.expression( 1058 exp.Properties, expressions=self._parse_wrapped_properties() 1059 ), 1060 } 1061 1062 ALTER_PARSERS = { 1063 "ADD": lambda self: self._parse_alter_table_add(), 1064 "AS": lambda self: self._parse_select(), 1065 "ALTER": lambda self: self._parse_alter_table_alter(), 1066 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1067 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1068 "DROP": lambda self: self._parse_alter_table_drop(), 1069 "RENAME": lambda self: self._parse_alter_table_rename(), 1070 "SET": lambda self: self._parse_alter_table_set(), 1071 "SWAP": lambda self: self.expression( 1072 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1073 ), 1074 } 1075 1076 ALTER_ALTER_PARSERS = { 1077 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1078 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1079 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1080 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1081 } 1082 1083 SCHEMA_UNNAMED_CONSTRAINTS = { 1084 "CHECK", 1085 "EXCLUDE", 1086 "FOREIGN KEY", 1087 "LIKE", 1088 "PERIOD", 1089 "PRIMARY KEY", 1090 "UNIQUE", 1091 } 1092 1093 NO_PAREN_FUNCTION_PARSERS = { 1094 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1095 "CASE": lambda self: self._parse_case(), 1096 "CONNECT_BY_ROOT": lambda self: self.expression( 1097 exp.ConnectByRoot, this=self._parse_column() 1098 ), 1099 "IF": lambda self: self._parse_if(), 1100 "NEXT": lambda self: self._parse_next_value_for(), 1101 } 1102 1103 INVALID_FUNC_NAME_TOKENS = { 1104 TokenType.IDENTIFIER, 1105 TokenType.STRING, 1106 } 1107 1108 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1109 1110 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1111 1112 FUNCTION_PARSERS = { 1113 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1114 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1115 "DECODE": lambda self: self._parse_decode(), 1116 "EXTRACT": lambda self: self._parse_extract(), 1117 "GAP_FILL": lambda self: self._parse_gap_fill(), 1118 "JSON_OBJECT": lambda self: self._parse_json_object(), 1119 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1120 "JSON_TABLE": lambda self: self._parse_json_table(), 1121 "MATCH": lambda self: self._parse_match_against(), 1122 "NORMALIZE": lambda self: self._parse_normalize(), 1123 "OPENJSON": lambda self: self._parse_open_json(), 1124 "OVERLAY": lambda self: self._parse_overlay(), 1125 "POSITION": lambda self: self._parse_position(), 1126 "PREDICT": lambda self: self._parse_predict(), 1127 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1128 "STRING_AGG": lambda self: self._parse_string_agg(), 1129 "SUBSTRING": lambda self: self._parse_substring(), 1130 "TRIM": lambda self: self._parse_trim(), 1131 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1132 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1133 } 1134 1135 QUERY_MODIFIER_PARSERS = { 1136 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1137 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1138 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1139 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1140 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1141 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1142 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1143 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1144 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1145 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1146 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1147 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1148 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1149 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1150 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1151 TokenType.CLUSTER_BY: lambda self: ( 1152 "cluster", 1153 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1154 ), 1155 TokenType.DISTRIBUTE_BY: lambda self: ( 1156 "distribute", 1157 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1158 ), 1159 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1160 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1161 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1162 } 1163 1164 SET_PARSERS = { 1165 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1166 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1167 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1168 "TRANSACTION": lambda self: self._parse_set_transaction(), 1169 } 1170 1171 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1172 1173 TYPE_LITERAL_PARSERS = { 1174 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1175 } 1176 1177 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1178 1179 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1180 1181 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1182 1183 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1184 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1185 "ISOLATION": ( 1186 ("LEVEL", "REPEATABLE", "READ"), 1187 ("LEVEL", "READ", "COMMITTED"), 1188 ("LEVEL", "READ", "UNCOMITTED"), 1189 ("LEVEL", "SERIALIZABLE"), 1190 ), 1191 "READ": ("WRITE", "ONLY"), 1192 } 1193 1194 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1195 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1196 ) 1197 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1198 1199 CREATE_SEQUENCE: OPTIONS_TYPE = { 1200 "SCALE": ("EXTEND", "NOEXTEND"), 1201 "SHARD": ("EXTEND", "NOEXTEND"), 1202 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1203 **dict.fromkeys( 1204 ( 1205 "SESSION", 1206 "GLOBAL", 1207 "KEEP", 1208 "NOKEEP", 1209 "ORDER", 1210 "NOORDER", 1211 "NOCACHE", 1212 "CYCLE", 1213 "NOCYCLE", 1214 "NOMINVALUE", 1215 "NOMAXVALUE", 1216 "NOSCALE", 1217 "NOSHARD", 1218 ), 1219 tuple(), 1220 ), 1221 } 1222 1223 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1224 1225 USABLES: OPTIONS_TYPE = dict.fromkeys( 1226 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1227 ) 1228 1229 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1230 1231 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1232 "TYPE": ("EVOLUTION",), 1233 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1234 } 1235 1236 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1237 1238 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1239 1240 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1241 "NOT": ("ENFORCED",), 1242 "MATCH": ( 1243 "FULL", 1244 "PARTIAL", 1245 "SIMPLE", 1246 ), 1247 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1248 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1249 } 1250 1251 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1252 1253 CLONE_KEYWORDS = {"CLONE", "COPY"} 1254 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1255 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1256 1257 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1258 1259 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1260 1261 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1262 1263 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1264 1265 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1266 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1267 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1268 1269 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1270 1271 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1272 1273 ADD_CONSTRAINT_TOKENS = { 1274 TokenType.CONSTRAINT, 1275 TokenType.FOREIGN_KEY, 1276 TokenType.INDEX, 1277 TokenType.KEY, 1278 TokenType.PRIMARY_KEY, 1279 TokenType.UNIQUE, 1280 } 1281 1282 DISTINCT_TOKENS = {TokenType.DISTINCT} 1283 1284 NULL_TOKENS = {TokenType.NULL} 1285 1286 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1287 1288 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1289 1290 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1291 1292 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1293 1294 ODBC_DATETIME_LITERALS = { 1295 "d": exp.Date, 1296 "t": exp.Time, 1297 "ts": exp.Timestamp, 1298 } 1299 1300 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1301 1302 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1303 1304 # The style options for the DESCRIBE statement 1305 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1306 1307 OPERATION_MODIFIERS: t.Set[str] = set() 1308 1309 STRICT_CAST = True 1310 1311 PREFIXED_PIVOT_COLUMNS = False 1312 IDENTIFY_PIVOT_STRINGS = False 1313 1314 LOG_DEFAULTS_TO_LN = False 1315 1316 # Whether ADD is present for each column added by ALTER TABLE 1317 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1318 1319 # Whether the table sample clause expects CSV syntax 1320 TABLESAMPLE_CSV = False 1321 1322 # The default method used for table sampling 1323 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1324 1325 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1326 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1327 1328 # Whether the TRIM function expects the characters to trim as its first argument 1329 TRIM_PATTERN_FIRST = False 1330 1331 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1332 STRING_ALIASES = False 1333 1334 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1335 MODIFIERS_ATTACHED_TO_SET_OP = True 1336 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1337 1338 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1339 NO_PAREN_IF_COMMANDS = True 1340 1341 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1342 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1343 1344 # Whether the `:` operator is used to extract a value from a VARIANT column 1345 COLON_IS_VARIANT_EXTRACT = False 1346 1347 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1348 # If this is True and '(' is not found, the keyword will be treated as an identifier 1349 VALUES_FOLLOWED_BY_PAREN = True 1350 1351 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1352 SUPPORTS_IMPLICIT_UNNEST = False 1353 1354 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1355 INTERVAL_SPANS = True 1356 1357 # Whether a PARTITION clause can follow a table reference 1358 SUPPORTS_PARTITION_SELECTION = False 1359 1360 __slots__ = ( 1361 "error_level", 1362 "error_message_context", 1363 "max_errors", 1364 "dialect", 1365 "sql", 1366 "errors", 1367 "_tokens", 1368 "_index", 1369 "_curr", 1370 "_next", 1371 "_prev", 1372 "_prev_comments", 1373 ) 1374 1375 # Autofilled 1376 SHOW_TRIE: t.Dict = {} 1377 SET_TRIE: t.Dict = {} 1378 1379 def __init__( 1380 self, 1381 error_level: t.Optional[ErrorLevel] = None, 1382 error_message_context: int = 100, 1383 max_errors: int = 3, 1384 dialect: DialectType = None, 1385 ): 1386 from sqlglot.dialects import Dialect 1387 1388 self.error_level = error_level or ErrorLevel.IMMEDIATE 1389 self.error_message_context = error_message_context 1390 self.max_errors = max_errors 1391 self.dialect = Dialect.get_or_raise(dialect) 1392 self.reset() 1393 1394 def reset(self): 1395 self.sql = "" 1396 self.errors = [] 1397 self._tokens = [] 1398 self._index = 0 1399 self._curr = None 1400 self._next = None 1401 self._prev = None 1402 self._prev_comments = None 1403 1404 def parse( 1405 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens and returns a list of syntax trees, one tree 1409 per parsed SQL statement. 1410 1411 Args: 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The list of the produced syntax trees. 1417 """ 1418 return self._parse( 1419 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1420 ) 1421 1422 def parse_into( 1423 self, 1424 expression_types: exp.IntoType, 1425 raw_tokens: t.List[Token], 1426 sql: t.Optional[str] = None, 1427 ) -> t.List[t.Optional[exp.Expression]]: 1428 """ 1429 Parses a list of tokens into a given Expression type. If a collection of Expression 1430 types is given instead, this method will try to parse the token list into each one 1431 of them, stopping at the first for which the parsing succeeds. 1432 1433 Args: 1434 expression_types: The expression type(s) to try and parse the token list into. 1435 raw_tokens: The list of tokens. 1436 sql: The original SQL string, used to produce helpful debug messages. 1437 1438 Returns: 1439 The target Expression. 1440 """ 1441 errors = [] 1442 for expression_type in ensure_list(expression_types): 1443 parser = self.EXPRESSION_PARSERS.get(expression_type) 1444 if not parser: 1445 raise TypeError(f"No parser registered for {expression_type}") 1446 1447 try: 1448 return self._parse(parser, raw_tokens, sql) 1449 except ParseError as e: 1450 e.errors[0]["into_expression"] = expression_type 1451 errors.append(e) 1452 1453 raise ParseError( 1454 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1455 errors=merge_errors(errors), 1456 ) from errors[-1] 1457 1458 def _parse( 1459 self, 1460 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1461 raw_tokens: t.List[Token], 1462 sql: t.Optional[str] = None, 1463 ) -> t.List[t.Optional[exp.Expression]]: 1464 self.reset() 1465 self.sql = sql or "" 1466 1467 total = len(raw_tokens) 1468 chunks: t.List[t.List[Token]] = [[]] 1469 1470 for i, token in enumerate(raw_tokens): 1471 if token.token_type == TokenType.SEMICOLON: 1472 if token.comments: 1473 chunks.append([token]) 1474 1475 if i < total - 1: 1476 chunks.append([]) 1477 else: 1478 chunks[-1].append(token) 1479 1480 expressions = [] 1481 1482 for tokens in chunks: 1483 self._index = -1 1484 self._tokens = tokens 1485 self._advance() 1486 1487 expressions.append(parse_method(self)) 1488 1489 if self._index < len(self._tokens): 1490 self.raise_error("Invalid expression / Unexpected token") 1491 1492 self.check_errors() 1493 1494 return expressions 1495 1496 def check_errors(self) -> None: 1497 """Logs or raises any found errors, depending on the chosen error level setting.""" 1498 if self.error_level == ErrorLevel.WARN: 1499 for error in self.errors: 1500 logger.error(str(error)) 1501 elif self.error_level == ErrorLevel.RAISE and self.errors: 1502 raise ParseError( 1503 concat_messages(self.errors, self.max_errors), 1504 errors=merge_errors(self.errors), 1505 ) 1506 1507 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1508 """ 1509 Appends an error in the list of recorded errors or raises it, depending on the chosen 1510 error level setting. 1511 """ 1512 token = token or self._curr or self._prev or Token.string("") 1513 start = token.start 1514 end = token.end + 1 1515 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1516 highlight = self.sql[start:end] 1517 end_context = self.sql[end : end + self.error_message_context] 1518 1519 error = ParseError.new( 1520 f"{message}. Line {token.line}, Col: {token.col}.\n" 1521 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1522 description=message, 1523 line=token.line, 1524 col=token.col, 1525 start_context=start_context, 1526 highlight=highlight, 1527 end_context=end_context, 1528 ) 1529 1530 if self.error_level == ErrorLevel.IMMEDIATE: 1531 raise error 1532 1533 self.errors.append(error) 1534 1535 def expression( 1536 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1537 ) -> E: 1538 """ 1539 Creates a new, validated Expression. 1540 1541 Args: 1542 exp_class: The expression class to instantiate. 1543 comments: An optional list of comments to attach to the expression. 1544 kwargs: The arguments to set for the expression along with their respective values. 1545 1546 Returns: 1547 The target expression. 1548 """ 1549 instance = exp_class(**kwargs) 1550 instance.add_comments(comments) if comments else self._add_comments(instance) 1551 return self.validate_expression(instance) 1552 1553 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1554 if expression and self._prev_comments: 1555 expression.add_comments(self._prev_comments) 1556 self._prev_comments = None 1557 1558 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1559 """ 1560 Validates an Expression, making sure that all its mandatory arguments are set. 1561 1562 Args: 1563 expression: The expression to validate. 1564 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1565 1566 Returns: 1567 The validated expression. 1568 """ 1569 if self.error_level != ErrorLevel.IGNORE: 1570 for error_message in expression.error_messages(args): 1571 self.raise_error(error_message) 1572 1573 return expression 1574 1575 def _find_sql(self, start: Token, end: Token) -> str: 1576 return self.sql[start.start : end.end + 1] 1577 1578 def _is_connected(self) -> bool: 1579 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1580 1581 def _advance(self, times: int = 1) -> None: 1582 self._index += times 1583 self._curr = seq_get(self._tokens, self._index) 1584 self._next = seq_get(self._tokens, self._index + 1) 1585 1586 if self._index > 0: 1587 self._prev = self._tokens[self._index - 1] 1588 self._prev_comments = self._prev.comments 1589 else: 1590 self._prev = None 1591 self._prev_comments = None 1592 1593 def _retreat(self, index: int) -> None: 1594 if index != self._index: 1595 self._advance(index - self._index) 1596 1597 def _warn_unsupported(self) -> None: 1598 if len(self._tokens) <= 1: 1599 return 1600 1601 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1602 # interested in emitting a warning for the one being currently processed. 1603 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1604 1605 logger.warning( 1606 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1607 ) 1608 1609 def _parse_command(self) -> exp.Command: 1610 self._warn_unsupported() 1611 return self.expression( 1612 exp.Command, 1613 comments=self._prev_comments, 1614 this=self._prev.text.upper(), 1615 expression=self._parse_string(), 1616 ) 1617 1618 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1619 """ 1620 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1621 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1622 solve this by setting & resetting the parser state accordingly 1623 """ 1624 index = self._index 1625 error_level = self.error_level 1626 1627 self.error_level = ErrorLevel.IMMEDIATE 1628 try: 1629 this = parse_method() 1630 except ParseError: 1631 this = None 1632 finally: 1633 if not this or retreat: 1634 self._retreat(index) 1635 self.error_level = error_level 1636 1637 return this 1638 1639 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1640 start = self._prev 1641 exists = self._parse_exists() if allow_exists else None 1642 1643 self._match(TokenType.ON) 1644 1645 materialized = self._match_text_seq("MATERIALIZED") 1646 kind = self._match_set(self.CREATABLES) and self._prev 1647 if not kind: 1648 return self._parse_as_command(start) 1649 1650 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1651 this = self._parse_user_defined_function(kind=kind.token_type) 1652 elif kind.token_type == TokenType.TABLE: 1653 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1654 elif kind.token_type == TokenType.COLUMN: 1655 this = self._parse_column() 1656 else: 1657 this = self._parse_id_var() 1658 1659 self._match(TokenType.IS) 1660 1661 return self.expression( 1662 exp.Comment, 1663 this=this, 1664 kind=kind.text, 1665 expression=self._parse_string(), 1666 exists=exists, 1667 materialized=materialized, 1668 ) 1669 1670 def _parse_to_table( 1671 self, 1672 ) -> exp.ToTableProperty: 1673 table = self._parse_table_parts(schema=True) 1674 return self.expression(exp.ToTableProperty, this=table) 1675 1676 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1677 def _parse_ttl(self) -> exp.Expression: 1678 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1679 this = self._parse_bitwise() 1680 1681 if self._match_text_seq("DELETE"): 1682 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1683 if self._match_text_seq("RECOMPRESS"): 1684 return self.expression( 1685 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1686 ) 1687 if self._match_text_seq("TO", "DISK"): 1688 return self.expression( 1689 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1690 ) 1691 if self._match_text_seq("TO", "VOLUME"): 1692 return self.expression( 1693 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1694 ) 1695 1696 return this 1697 1698 expressions = self._parse_csv(_parse_ttl_action) 1699 where = self._parse_where() 1700 group = self._parse_group() 1701 1702 aggregates = None 1703 if group and self._match(TokenType.SET): 1704 aggregates = self._parse_csv(self._parse_set_item) 1705 1706 return self.expression( 1707 exp.MergeTreeTTL, 1708 expressions=expressions, 1709 where=where, 1710 group=group, 1711 aggregates=aggregates, 1712 ) 1713 1714 def _parse_statement(self) -> t.Optional[exp.Expression]: 1715 if self._curr is None: 1716 return None 1717 1718 if self._match_set(self.STATEMENT_PARSERS): 1719 comments = self._prev_comments 1720 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1721 stmt.add_comments(comments, prepend=True) 1722 return stmt 1723 1724 if self._match_set(self.dialect.tokenizer.COMMANDS): 1725 return self._parse_command() 1726 1727 expression = self._parse_expression() 1728 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1729 return self._parse_query_modifiers(expression) 1730 1731 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1732 start = self._prev 1733 temporary = self._match(TokenType.TEMPORARY) 1734 materialized = self._match_text_seq("MATERIALIZED") 1735 1736 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1737 if not kind: 1738 return self._parse_as_command(start) 1739 1740 concurrently = self._match_text_seq("CONCURRENTLY") 1741 if_exists = exists or self._parse_exists() 1742 1743 if kind == "COLUMN": 1744 this = self._parse_column() 1745 else: 1746 this = self._parse_table_parts( 1747 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1748 ) 1749 1750 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1751 1752 if self._match(TokenType.L_PAREN, advance=False): 1753 expressions = self._parse_wrapped_csv(self._parse_types) 1754 else: 1755 expressions = None 1756 1757 return self.expression( 1758 exp.Drop, 1759 exists=if_exists, 1760 this=this, 1761 expressions=expressions, 1762 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1763 temporary=temporary, 1764 materialized=materialized, 1765 cascade=self._match_text_seq("CASCADE"), 1766 constraints=self._match_text_seq("CONSTRAINTS"), 1767 purge=self._match_text_seq("PURGE"), 1768 cluster=cluster, 1769 concurrently=concurrently, 1770 ) 1771 1772 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1773 return ( 1774 self._match_text_seq("IF") 1775 and (not not_ or self._match(TokenType.NOT)) 1776 and self._match(TokenType.EXISTS) 1777 ) 1778 1779 def _parse_create(self) -> exp.Create | exp.Command: 1780 # Note: this can't be None because we've matched a statement parser 1781 start = self._prev 1782 1783 replace = ( 1784 start.token_type == TokenType.REPLACE 1785 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1786 or self._match_pair(TokenType.OR, TokenType.ALTER) 1787 ) 1788 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1789 1790 unique = self._match(TokenType.UNIQUE) 1791 1792 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1793 clustered = True 1794 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1795 "COLUMNSTORE" 1796 ): 1797 clustered = False 1798 else: 1799 clustered = None 1800 1801 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1802 self._advance() 1803 1804 properties = None 1805 create_token = self._match_set(self.CREATABLES) and self._prev 1806 1807 if not create_token: 1808 # exp.Properties.Location.POST_CREATE 1809 properties = self._parse_properties() 1810 create_token = self._match_set(self.CREATABLES) and self._prev 1811 1812 if not properties or not create_token: 1813 return self._parse_as_command(start) 1814 1815 concurrently = self._match_text_seq("CONCURRENTLY") 1816 exists = self._parse_exists(not_=True) 1817 this = None 1818 expression: t.Optional[exp.Expression] = None 1819 indexes = None 1820 no_schema_binding = None 1821 begin = None 1822 end = None 1823 clone = None 1824 1825 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1826 nonlocal properties 1827 if properties and temp_props: 1828 properties.expressions.extend(temp_props.expressions) 1829 elif temp_props: 1830 properties = temp_props 1831 1832 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1833 this = self._parse_user_defined_function(kind=create_token.token_type) 1834 1835 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1836 extend_props(self._parse_properties()) 1837 1838 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1839 extend_props(self._parse_properties()) 1840 1841 if not expression: 1842 if self._match(TokenType.COMMAND): 1843 expression = self._parse_as_command(self._prev) 1844 else: 1845 begin = self._match(TokenType.BEGIN) 1846 return_ = self._match_text_seq("RETURN") 1847 1848 if self._match(TokenType.STRING, advance=False): 1849 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1850 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1851 expression = self._parse_string() 1852 extend_props(self._parse_properties()) 1853 else: 1854 expression = self._parse_user_defined_function_expression() 1855 1856 end = self._match_text_seq("END") 1857 1858 if return_: 1859 expression = self.expression(exp.Return, this=expression) 1860 elif create_token.token_type == TokenType.INDEX: 1861 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1862 if not self._match(TokenType.ON): 1863 index = self._parse_id_var() 1864 anonymous = False 1865 else: 1866 index = None 1867 anonymous = True 1868 1869 this = self._parse_index(index=index, anonymous=anonymous) 1870 elif create_token.token_type in self.DB_CREATABLES: 1871 table_parts = self._parse_table_parts( 1872 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1873 ) 1874 1875 # exp.Properties.Location.POST_NAME 1876 self._match(TokenType.COMMA) 1877 extend_props(self._parse_properties(before=True)) 1878 1879 this = self._parse_schema(this=table_parts) 1880 1881 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1882 extend_props(self._parse_properties()) 1883 1884 self._match(TokenType.ALIAS) 1885 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1886 # exp.Properties.Location.POST_ALIAS 1887 extend_props(self._parse_properties()) 1888 1889 if create_token.token_type == TokenType.SEQUENCE: 1890 expression = self._parse_types() 1891 extend_props(self._parse_properties()) 1892 else: 1893 expression = self._parse_ddl_select() 1894 1895 if create_token.token_type == TokenType.TABLE: 1896 # exp.Properties.Location.POST_EXPRESSION 1897 extend_props(self._parse_properties()) 1898 1899 indexes = [] 1900 while True: 1901 index = self._parse_index() 1902 1903 # exp.Properties.Location.POST_INDEX 1904 extend_props(self._parse_properties()) 1905 if not index: 1906 break 1907 else: 1908 self._match(TokenType.COMMA) 1909 indexes.append(index) 1910 elif create_token.token_type == TokenType.VIEW: 1911 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1912 no_schema_binding = True 1913 1914 shallow = self._match_text_seq("SHALLOW") 1915 1916 if self._match_texts(self.CLONE_KEYWORDS): 1917 copy = self._prev.text.lower() == "copy" 1918 clone = self.expression( 1919 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1920 ) 1921 1922 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1923 return self._parse_as_command(start) 1924 1925 create_kind_text = create_token.text.upper() 1926 return self.expression( 1927 exp.Create, 1928 this=this, 1929 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1930 replace=replace, 1931 refresh=refresh, 1932 unique=unique, 1933 expression=expression, 1934 exists=exists, 1935 properties=properties, 1936 indexes=indexes, 1937 no_schema_binding=no_schema_binding, 1938 begin=begin, 1939 end=end, 1940 clone=clone, 1941 concurrently=concurrently, 1942 clustered=clustered, 1943 ) 1944 1945 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1946 seq = exp.SequenceProperties() 1947 1948 options = [] 1949 index = self._index 1950 1951 while self._curr: 1952 self._match(TokenType.COMMA) 1953 if self._match_text_seq("INCREMENT"): 1954 self._match_text_seq("BY") 1955 self._match_text_seq("=") 1956 seq.set("increment", self._parse_term()) 1957 elif self._match_text_seq("MINVALUE"): 1958 seq.set("minvalue", self._parse_term()) 1959 elif self._match_text_seq("MAXVALUE"): 1960 seq.set("maxvalue", self._parse_term()) 1961 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1962 self._match_text_seq("=") 1963 seq.set("start", self._parse_term()) 1964 elif self._match_text_seq("CACHE"): 1965 # T-SQL allows empty CACHE which is initialized dynamically 1966 seq.set("cache", self._parse_number() or True) 1967 elif self._match_text_seq("OWNED", "BY"): 1968 # "OWNED BY NONE" is the default 1969 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1970 else: 1971 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1972 if opt: 1973 options.append(opt) 1974 else: 1975 break 1976 1977 seq.set("options", options if options else None) 1978 return None if self._index == index else seq 1979 1980 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1981 # only used for teradata currently 1982 self._match(TokenType.COMMA) 1983 1984 kwargs = { 1985 "no": self._match_text_seq("NO"), 1986 "dual": self._match_text_seq("DUAL"), 1987 "before": self._match_text_seq("BEFORE"), 1988 "default": self._match_text_seq("DEFAULT"), 1989 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1990 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1991 "after": self._match_text_seq("AFTER"), 1992 "minimum": self._match_texts(("MIN", "MINIMUM")), 1993 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1994 } 1995 1996 if self._match_texts(self.PROPERTY_PARSERS): 1997 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1998 try: 1999 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2000 except TypeError: 2001 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2002 2003 return None 2004 2005 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2006 return self._parse_wrapped_csv(self._parse_property) 2007 2008 def _parse_property(self) -> t.Optional[exp.Expression]: 2009 if self._match_texts(self.PROPERTY_PARSERS): 2010 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2011 2012 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2013 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2014 2015 if self._match_text_seq("COMPOUND", "SORTKEY"): 2016 return self._parse_sortkey(compound=True) 2017 2018 if self._match_text_seq("SQL", "SECURITY"): 2019 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2020 2021 index = self._index 2022 key = self._parse_column() 2023 2024 if not self._match(TokenType.EQ): 2025 self._retreat(index) 2026 return self._parse_sequence_properties() 2027 2028 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2029 if isinstance(key, exp.Column): 2030 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2031 2032 value = self._parse_bitwise() or self._parse_var(any_token=True) 2033 2034 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2035 if isinstance(value, exp.Column): 2036 value = exp.var(value.name) 2037 2038 return self.expression(exp.Property, this=key, value=value) 2039 2040 def _parse_stored(self) -> exp.FileFormatProperty: 2041 self._match(TokenType.ALIAS) 2042 2043 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2044 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2045 2046 return self.expression( 2047 exp.FileFormatProperty, 2048 this=( 2049 self.expression( 2050 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2051 ) 2052 if input_format or output_format 2053 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2054 ), 2055 ) 2056 2057 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2058 field = self._parse_field() 2059 if isinstance(field, exp.Identifier) and not field.quoted: 2060 field = exp.var(field) 2061 2062 return field 2063 2064 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2065 self._match(TokenType.EQ) 2066 self._match(TokenType.ALIAS) 2067 2068 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2069 2070 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2071 properties = [] 2072 while True: 2073 if before: 2074 prop = self._parse_property_before() 2075 else: 2076 prop = self._parse_property() 2077 if not prop: 2078 break 2079 for p in ensure_list(prop): 2080 properties.append(p) 2081 2082 if properties: 2083 return self.expression(exp.Properties, expressions=properties) 2084 2085 return None 2086 2087 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2088 return self.expression( 2089 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2090 ) 2091 2092 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2093 if self._match_texts(("DEFINER", "INVOKER")): 2094 security_specifier = self._prev.text.upper() 2095 return self.expression(exp.SecurityProperty, this=security_specifier) 2096 return None 2097 2098 def _parse_settings_property(self) -> exp.SettingsProperty: 2099 return self.expression( 2100 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2101 ) 2102 2103 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2104 if self._index >= 2: 2105 pre_volatile_token = self._tokens[self._index - 2] 2106 else: 2107 pre_volatile_token = None 2108 2109 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2110 return exp.VolatileProperty() 2111 2112 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2113 2114 def _parse_retention_period(self) -> exp.Var: 2115 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2116 number = self._parse_number() 2117 number_str = f"{number} " if number else "" 2118 unit = self._parse_var(any_token=True) 2119 return exp.var(f"{number_str}{unit}") 2120 2121 def _parse_system_versioning_property( 2122 self, with_: bool = False 2123 ) -> exp.WithSystemVersioningProperty: 2124 self._match(TokenType.EQ) 2125 prop = self.expression( 2126 exp.WithSystemVersioningProperty, 2127 **{ # type: ignore 2128 "on": True, 2129 "with": with_, 2130 }, 2131 ) 2132 2133 if self._match_text_seq("OFF"): 2134 prop.set("on", False) 2135 return prop 2136 2137 self._match(TokenType.ON) 2138 if self._match(TokenType.L_PAREN): 2139 while self._curr and not self._match(TokenType.R_PAREN): 2140 if self._match_text_seq("HISTORY_TABLE", "="): 2141 prop.set("this", self._parse_table_parts()) 2142 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2143 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2144 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2145 prop.set("retention_period", self._parse_retention_period()) 2146 2147 self._match(TokenType.COMMA) 2148 2149 return prop 2150 2151 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2152 self._match(TokenType.EQ) 2153 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2154 prop = self.expression(exp.DataDeletionProperty, on=on) 2155 2156 if self._match(TokenType.L_PAREN): 2157 while self._curr and not self._match(TokenType.R_PAREN): 2158 if self._match_text_seq("FILTER_COLUMN", "="): 2159 prop.set("filter_column", self._parse_column()) 2160 elif self._match_text_seq("RETENTION_PERIOD", "="): 2161 prop.set("retention_period", self._parse_retention_period()) 2162 2163 self._match(TokenType.COMMA) 2164 2165 return prop 2166 2167 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2168 kind = "HASH" 2169 expressions: t.Optional[t.List[exp.Expression]] = None 2170 if self._match_text_seq("BY", "HASH"): 2171 expressions = self._parse_wrapped_csv(self._parse_id_var) 2172 elif self._match_text_seq("BY", "RANDOM"): 2173 kind = "RANDOM" 2174 2175 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2176 buckets: t.Optional[exp.Expression] = None 2177 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2178 buckets = self._parse_number() 2179 2180 return self.expression( 2181 exp.DistributedByProperty, 2182 expressions=expressions, 2183 kind=kind, 2184 buckets=buckets, 2185 order=self._parse_order(), 2186 ) 2187 2188 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2189 self._match_text_seq("KEY") 2190 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2191 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2192 2193 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2194 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2195 prop = self._parse_system_versioning_property(with_=True) 2196 self._match_r_paren() 2197 return prop 2198 2199 if self._match(TokenType.L_PAREN, advance=False): 2200 return self._parse_wrapped_properties() 2201 2202 if self._match_text_seq("JOURNAL"): 2203 return self._parse_withjournaltable() 2204 2205 if self._match_texts(self.VIEW_ATTRIBUTES): 2206 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2207 2208 if self._match_text_seq("DATA"): 2209 return self._parse_withdata(no=False) 2210 elif self._match_text_seq("NO", "DATA"): 2211 return self._parse_withdata(no=True) 2212 2213 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2214 return self._parse_serde_properties(with_=True) 2215 2216 if self._match(TokenType.SCHEMA): 2217 return self.expression( 2218 exp.WithSchemaBindingProperty, 2219 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2220 ) 2221 2222 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2223 return self.expression( 2224 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2225 ) 2226 2227 if not self._next: 2228 return None 2229 2230 return self._parse_withisolatedloading() 2231 2232 def _parse_procedure_option(self) -> exp.Expression | None: 2233 if self._match_text_seq("EXECUTE", "AS"): 2234 return self.expression( 2235 exp.ExecuteAsProperty, 2236 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2237 or self._parse_string(), 2238 ) 2239 2240 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2241 2242 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2243 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2244 self._match(TokenType.EQ) 2245 2246 user = self._parse_id_var() 2247 self._match(TokenType.PARAMETER) 2248 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2249 2250 if not user or not host: 2251 return None 2252 2253 return exp.DefinerProperty(this=f"{user}@{host}") 2254 2255 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2256 self._match(TokenType.TABLE) 2257 self._match(TokenType.EQ) 2258 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2259 2260 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2261 return self.expression(exp.LogProperty, no=no) 2262 2263 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2264 return self.expression(exp.JournalProperty, **kwargs) 2265 2266 def _parse_checksum(self) -> exp.ChecksumProperty: 2267 self._match(TokenType.EQ) 2268 2269 on = None 2270 if self._match(TokenType.ON): 2271 on = True 2272 elif self._match_text_seq("OFF"): 2273 on = False 2274 2275 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2276 2277 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2278 return self.expression( 2279 exp.Cluster, 2280 expressions=( 2281 self._parse_wrapped_csv(self._parse_ordered) 2282 if wrapped 2283 else self._parse_csv(self._parse_ordered) 2284 ), 2285 ) 2286 2287 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2288 self._match_text_seq("BY") 2289 2290 self._match_l_paren() 2291 expressions = self._parse_csv(self._parse_column) 2292 self._match_r_paren() 2293 2294 if self._match_text_seq("SORTED", "BY"): 2295 self._match_l_paren() 2296 sorted_by = self._parse_csv(self._parse_ordered) 2297 self._match_r_paren() 2298 else: 2299 sorted_by = None 2300 2301 self._match(TokenType.INTO) 2302 buckets = self._parse_number() 2303 self._match_text_seq("BUCKETS") 2304 2305 return self.expression( 2306 exp.ClusteredByProperty, 2307 expressions=expressions, 2308 sorted_by=sorted_by, 2309 buckets=buckets, 2310 ) 2311 2312 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2313 if not self._match_text_seq("GRANTS"): 2314 self._retreat(self._index - 1) 2315 return None 2316 2317 return self.expression(exp.CopyGrantsProperty) 2318 2319 def _parse_freespace(self) -> exp.FreespaceProperty: 2320 self._match(TokenType.EQ) 2321 return self.expression( 2322 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2323 ) 2324 2325 def _parse_mergeblockratio( 2326 self, no: bool = False, default: bool = False 2327 ) -> exp.MergeBlockRatioProperty: 2328 if self._match(TokenType.EQ): 2329 return self.expression( 2330 exp.MergeBlockRatioProperty, 2331 this=self._parse_number(), 2332 percent=self._match(TokenType.PERCENT), 2333 ) 2334 2335 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2336 2337 def _parse_datablocksize( 2338 self, 2339 default: t.Optional[bool] = None, 2340 minimum: t.Optional[bool] = None, 2341 maximum: t.Optional[bool] = None, 2342 ) -> exp.DataBlocksizeProperty: 2343 self._match(TokenType.EQ) 2344 size = self._parse_number() 2345 2346 units = None 2347 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2348 units = self._prev.text 2349 2350 return self.expression( 2351 exp.DataBlocksizeProperty, 2352 size=size, 2353 units=units, 2354 default=default, 2355 minimum=minimum, 2356 maximum=maximum, 2357 ) 2358 2359 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2360 self._match(TokenType.EQ) 2361 always = self._match_text_seq("ALWAYS") 2362 manual = self._match_text_seq("MANUAL") 2363 never = self._match_text_seq("NEVER") 2364 default = self._match_text_seq("DEFAULT") 2365 2366 autotemp = None 2367 if self._match_text_seq("AUTOTEMP"): 2368 autotemp = self._parse_schema() 2369 2370 return self.expression( 2371 exp.BlockCompressionProperty, 2372 always=always, 2373 manual=manual, 2374 never=never, 2375 default=default, 2376 autotemp=autotemp, 2377 ) 2378 2379 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2380 index = self._index 2381 no = self._match_text_seq("NO") 2382 concurrent = self._match_text_seq("CONCURRENT") 2383 2384 if not self._match_text_seq("ISOLATED", "LOADING"): 2385 self._retreat(index) 2386 return None 2387 2388 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2389 return self.expression( 2390 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2391 ) 2392 2393 def _parse_locking(self) -> exp.LockingProperty: 2394 if self._match(TokenType.TABLE): 2395 kind = "TABLE" 2396 elif self._match(TokenType.VIEW): 2397 kind = "VIEW" 2398 elif self._match(TokenType.ROW): 2399 kind = "ROW" 2400 elif self._match_text_seq("DATABASE"): 2401 kind = "DATABASE" 2402 else: 2403 kind = None 2404 2405 if kind in ("DATABASE", "TABLE", "VIEW"): 2406 this = self._parse_table_parts() 2407 else: 2408 this = None 2409 2410 if self._match(TokenType.FOR): 2411 for_or_in = "FOR" 2412 elif self._match(TokenType.IN): 2413 for_or_in = "IN" 2414 else: 2415 for_or_in = None 2416 2417 if self._match_text_seq("ACCESS"): 2418 lock_type = "ACCESS" 2419 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2420 lock_type = "EXCLUSIVE" 2421 elif self._match_text_seq("SHARE"): 2422 lock_type = "SHARE" 2423 elif self._match_text_seq("READ"): 2424 lock_type = "READ" 2425 elif self._match_text_seq("WRITE"): 2426 lock_type = "WRITE" 2427 elif self._match_text_seq("CHECKSUM"): 2428 lock_type = "CHECKSUM" 2429 else: 2430 lock_type = None 2431 2432 override = self._match_text_seq("OVERRIDE") 2433 2434 return self.expression( 2435 exp.LockingProperty, 2436 this=this, 2437 kind=kind, 2438 for_or_in=for_or_in, 2439 lock_type=lock_type, 2440 override=override, 2441 ) 2442 2443 def _parse_partition_by(self) -> t.List[exp.Expression]: 2444 if self._match(TokenType.PARTITION_BY): 2445 return self._parse_csv(self._parse_assignment) 2446 return [] 2447 2448 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2449 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2450 if self._match_text_seq("MINVALUE"): 2451 return exp.var("MINVALUE") 2452 if self._match_text_seq("MAXVALUE"): 2453 return exp.var("MAXVALUE") 2454 return self._parse_bitwise() 2455 2456 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2457 expression = None 2458 from_expressions = None 2459 to_expressions = None 2460 2461 if self._match(TokenType.IN): 2462 this = self._parse_wrapped_csv(self._parse_bitwise) 2463 elif self._match(TokenType.FROM): 2464 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2465 self._match_text_seq("TO") 2466 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2467 elif self._match_text_seq("WITH", "(", "MODULUS"): 2468 this = self._parse_number() 2469 self._match_text_seq(",", "REMAINDER") 2470 expression = self._parse_number() 2471 self._match_r_paren() 2472 else: 2473 self.raise_error("Failed to parse partition bound spec.") 2474 2475 return self.expression( 2476 exp.PartitionBoundSpec, 2477 this=this, 2478 expression=expression, 2479 from_expressions=from_expressions, 2480 to_expressions=to_expressions, 2481 ) 2482 2483 # https://www.postgresql.org/docs/current/sql-createtable.html 2484 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2485 if not self._match_text_seq("OF"): 2486 self._retreat(self._index - 1) 2487 return None 2488 2489 this = self._parse_table(schema=True) 2490 2491 if self._match(TokenType.DEFAULT): 2492 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2493 elif self._match_text_seq("FOR", "VALUES"): 2494 expression = self._parse_partition_bound_spec() 2495 else: 2496 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2497 2498 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2499 2500 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2501 self._match(TokenType.EQ) 2502 return self.expression( 2503 exp.PartitionedByProperty, 2504 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2505 ) 2506 2507 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2508 if self._match_text_seq("AND", "STATISTICS"): 2509 statistics = True 2510 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2511 statistics = False 2512 else: 2513 statistics = None 2514 2515 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2516 2517 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2518 if self._match_text_seq("SQL"): 2519 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2520 return None 2521 2522 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2523 if self._match_text_seq("SQL", "DATA"): 2524 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2525 return None 2526 2527 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2528 if self._match_text_seq("PRIMARY", "INDEX"): 2529 return exp.NoPrimaryIndexProperty() 2530 if self._match_text_seq("SQL"): 2531 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2532 return None 2533 2534 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2535 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2536 return exp.OnCommitProperty() 2537 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2538 return exp.OnCommitProperty(delete=True) 2539 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2540 2541 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2542 if self._match_text_seq("SQL", "DATA"): 2543 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2544 return None 2545 2546 def _parse_distkey(self) -> exp.DistKeyProperty: 2547 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2548 2549 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2550 table = self._parse_table(schema=True) 2551 2552 options = [] 2553 while self._match_texts(("INCLUDING", "EXCLUDING")): 2554 this = self._prev.text.upper() 2555 2556 id_var = self._parse_id_var() 2557 if not id_var: 2558 return None 2559 2560 options.append( 2561 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2562 ) 2563 2564 return self.expression(exp.LikeProperty, this=table, expressions=options) 2565 2566 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2567 return self.expression( 2568 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2569 ) 2570 2571 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2572 self._match(TokenType.EQ) 2573 return self.expression( 2574 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2575 ) 2576 2577 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2578 self._match_text_seq("WITH", "CONNECTION") 2579 return self.expression( 2580 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2581 ) 2582 2583 def _parse_returns(self) -> exp.ReturnsProperty: 2584 value: t.Optional[exp.Expression] 2585 null = None 2586 is_table = self._match(TokenType.TABLE) 2587 2588 if is_table: 2589 if self._match(TokenType.LT): 2590 value = self.expression( 2591 exp.Schema, 2592 this="TABLE", 2593 expressions=self._parse_csv(self._parse_struct_types), 2594 ) 2595 if not self._match(TokenType.GT): 2596 self.raise_error("Expecting >") 2597 else: 2598 value = self._parse_schema(exp.var("TABLE")) 2599 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2600 null = True 2601 value = None 2602 else: 2603 value = self._parse_types() 2604 2605 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2606 2607 def _parse_describe(self) -> exp.Describe: 2608 kind = self._match_set(self.CREATABLES) and self._prev.text 2609 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2610 if self._match(TokenType.DOT): 2611 style = None 2612 self._retreat(self._index - 2) 2613 this = self._parse_table(schema=True) 2614 properties = self._parse_properties() 2615 expressions = properties.expressions if properties else None 2616 partition = self._parse_partition() 2617 return self.expression( 2618 exp.Describe, 2619 this=this, 2620 style=style, 2621 kind=kind, 2622 expressions=expressions, 2623 partition=partition, 2624 ) 2625 2626 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2627 kind = self._prev.text.upper() 2628 expressions = [] 2629 2630 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2631 if self._match(TokenType.WHEN): 2632 expression = self._parse_disjunction() 2633 self._match(TokenType.THEN) 2634 else: 2635 expression = None 2636 2637 else_ = self._match(TokenType.ELSE) 2638 2639 if not self._match(TokenType.INTO): 2640 return None 2641 2642 return self.expression( 2643 exp.ConditionalInsert, 2644 this=self.expression( 2645 exp.Insert, 2646 this=self._parse_table(schema=True), 2647 expression=self._parse_derived_table_values(), 2648 ), 2649 expression=expression, 2650 else_=else_, 2651 ) 2652 2653 expression = parse_conditional_insert() 2654 while expression is not None: 2655 expressions.append(expression) 2656 expression = parse_conditional_insert() 2657 2658 return self.expression( 2659 exp.MultitableInserts, 2660 kind=kind, 2661 comments=comments, 2662 expressions=expressions, 2663 source=self._parse_table(), 2664 ) 2665 2666 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2667 comments = [] 2668 hint = self._parse_hint() 2669 overwrite = self._match(TokenType.OVERWRITE) 2670 ignore = self._match(TokenType.IGNORE) 2671 local = self._match_text_seq("LOCAL") 2672 alternative = None 2673 is_function = None 2674 2675 if self._match_text_seq("DIRECTORY"): 2676 this: t.Optional[exp.Expression] = self.expression( 2677 exp.Directory, 2678 this=self._parse_var_or_string(), 2679 local=local, 2680 row_format=self._parse_row_format(match_row=True), 2681 ) 2682 else: 2683 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2684 comments += ensure_list(self._prev_comments) 2685 return self._parse_multitable_inserts(comments) 2686 2687 if self._match(TokenType.OR): 2688 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2689 2690 self._match(TokenType.INTO) 2691 comments += ensure_list(self._prev_comments) 2692 self._match(TokenType.TABLE) 2693 is_function = self._match(TokenType.FUNCTION) 2694 2695 this = ( 2696 self._parse_table(schema=True, parse_partition=True) 2697 if not is_function 2698 else self._parse_function() 2699 ) 2700 2701 returning = self._parse_returning() 2702 2703 return self.expression( 2704 exp.Insert, 2705 comments=comments, 2706 hint=hint, 2707 is_function=is_function, 2708 this=this, 2709 stored=self._match_text_seq("STORED") and self._parse_stored(), 2710 by_name=self._match_text_seq("BY", "NAME"), 2711 exists=self._parse_exists(), 2712 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2713 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2714 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2715 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2716 conflict=self._parse_on_conflict(), 2717 returning=returning or self._parse_returning(), 2718 overwrite=overwrite, 2719 alternative=alternative, 2720 ignore=ignore, 2721 source=self._match(TokenType.TABLE) and self._parse_table(), 2722 ) 2723 2724 def _parse_kill(self) -> exp.Kill: 2725 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2726 2727 return self.expression( 2728 exp.Kill, 2729 this=self._parse_primary(), 2730 kind=kind, 2731 ) 2732 2733 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2734 conflict = self._match_text_seq("ON", "CONFLICT") 2735 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2736 2737 if not conflict and not duplicate: 2738 return None 2739 2740 conflict_keys = None 2741 constraint = None 2742 2743 if conflict: 2744 if self._match_text_seq("ON", "CONSTRAINT"): 2745 constraint = self._parse_id_var() 2746 elif self._match(TokenType.L_PAREN): 2747 conflict_keys = self._parse_csv(self._parse_id_var) 2748 self._match_r_paren() 2749 2750 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2751 if self._prev.token_type == TokenType.UPDATE: 2752 self._match(TokenType.SET) 2753 expressions = self._parse_csv(self._parse_equality) 2754 else: 2755 expressions = None 2756 2757 return self.expression( 2758 exp.OnConflict, 2759 duplicate=duplicate, 2760 expressions=expressions, 2761 action=action, 2762 conflict_keys=conflict_keys, 2763 constraint=constraint, 2764 ) 2765 2766 def _parse_returning(self) -> t.Optional[exp.Returning]: 2767 if not self._match(TokenType.RETURNING): 2768 return None 2769 return self.expression( 2770 exp.Returning, 2771 expressions=self._parse_csv(self._parse_expression), 2772 into=self._match(TokenType.INTO) and self._parse_table_part(), 2773 ) 2774 2775 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2776 if not self._match(TokenType.FORMAT): 2777 return None 2778 return self._parse_row_format() 2779 2780 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2781 index = self._index 2782 with_ = with_ or self._match_text_seq("WITH") 2783 2784 if not self._match(TokenType.SERDE_PROPERTIES): 2785 self._retreat(index) 2786 return None 2787 return self.expression( 2788 exp.SerdeProperties, 2789 **{ # type: ignore 2790 "expressions": self._parse_wrapped_properties(), 2791 "with": with_, 2792 }, 2793 ) 2794 2795 def _parse_row_format( 2796 self, match_row: bool = False 2797 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2798 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2799 return None 2800 2801 if self._match_text_seq("SERDE"): 2802 this = self._parse_string() 2803 2804 serde_properties = self._parse_serde_properties() 2805 2806 return self.expression( 2807 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2808 ) 2809 2810 self._match_text_seq("DELIMITED") 2811 2812 kwargs = {} 2813 2814 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2815 kwargs["fields"] = self._parse_string() 2816 if self._match_text_seq("ESCAPED", "BY"): 2817 kwargs["escaped"] = self._parse_string() 2818 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2819 kwargs["collection_items"] = self._parse_string() 2820 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2821 kwargs["map_keys"] = self._parse_string() 2822 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2823 kwargs["lines"] = self._parse_string() 2824 if self._match_text_seq("NULL", "DEFINED", "AS"): 2825 kwargs["null"] = self._parse_string() 2826 2827 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2828 2829 def _parse_load(self) -> exp.LoadData | exp.Command: 2830 if self._match_text_seq("DATA"): 2831 local = self._match_text_seq("LOCAL") 2832 self._match_text_seq("INPATH") 2833 inpath = self._parse_string() 2834 overwrite = self._match(TokenType.OVERWRITE) 2835 self._match_pair(TokenType.INTO, TokenType.TABLE) 2836 2837 return self.expression( 2838 exp.LoadData, 2839 this=self._parse_table(schema=True), 2840 local=local, 2841 overwrite=overwrite, 2842 inpath=inpath, 2843 partition=self._parse_partition(), 2844 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2845 serde=self._match_text_seq("SERDE") and self._parse_string(), 2846 ) 2847 return self._parse_as_command(self._prev) 2848 2849 def _parse_delete(self) -> exp.Delete: 2850 # This handles MySQL's "Multiple-Table Syntax" 2851 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2852 tables = None 2853 if not self._match(TokenType.FROM, advance=False): 2854 tables = self._parse_csv(self._parse_table) or None 2855 2856 returning = self._parse_returning() 2857 2858 return self.expression( 2859 exp.Delete, 2860 tables=tables, 2861 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2862 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2863 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2864 where=self._parse_where(), 2865 returning=returning or self._parse_returning(), 2866 limit=self._parse_limit(), 2867 ) 2868 2869 def _parse_update(self) -> exp.Update: 2870 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2871 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2872 returning = self._parse_returning() 2873 return self.expression( 2874 exp.Update, 2875 **{ # type: ignore 2876 "this": this, 2877 "expressions": expressions, 2878 "from": self._parse_from(joins=True), 2879 "where": self._parse_where(), 2880 "returning": returning or self._parse_returning(), 2881 "order": self._parse_order(), 2882 "limit": self._parse_limit(), 2883 }, 2884 ) 2885 2886 def _parse_uncache(self) -> exp.Uncache: 2887 if not self._match(TokenType.TABLE): 2888 self.raise_error("Expecting TABLE after UNCACHE") 2889 2890 return self.expression( 2891 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2892 ) 2893 2894 def _parse_cache(self) -> exp.Cache: 2895 lazy = self._match_text_seq("LAZY") 2896 self._match(TokenType.TABLE) 2897 table = self._parse_table(schema=True) 2898 2899 options = [] 2900 if self._match_text_seq("OPTIONS"): 2901 self._match_l_paren() 2902 k = self._parse_string() 2903 self._match(TokenType.EQ) 2904 v = self._parse_string() 2905 options = [k, v] 2906 self._match_r_paren() 2907 2908 self._match(TokenType.ALIAS) 2909 return self.expression( 2910 exp.Cache, 2911 this=table, 2912 lazy=lazy, 2913 options=options, 2914 expression=self._parse_select(nested=True), 2915 ) 2916 2917 def _parse_partition(self) -> t.Optional[exp.Partition]: 2918 if not self._match(TokenType.PARTITION): 2919 return None 2920 2921 return self.expression( 2922 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2923 ) 2924 2925 def _parse_value(self) -> t.Optional[exp.Tuple]: 2926 if self._match(TokenType.L_PAREN): 2927 expressions = self._parse_csv(self._parse_expression) 2928 self._match_r_paren() 2929 return self.expression(exp.Tuple, expressions=expressions) 2930 2931 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2932 expression = self._parse_expression() 2933 if expression: 2934 return self.expression(exp.Tuple, expressions=[expression]) 2935 return None 2936 2937 def _parse_projections(self) -> t.List[exp.Expression]: 2938 return self._parse_expressions() 2939 2940 def _parse_select( 2941 self, 2942 nested: bool = False, 2943 table: bool = False, 2944 parse_subquery_alias: bool = True, 2945 parse_set_operation: bool = True, 2946 ) -> t.Optional[exp.Expression]: 2947 cte = self._parse_with() 2948 2949 if cte: 2950 this = self._parse_statement() 2951 2952 if not this: 2953 self.raise_error("Failed to parse any statement following CTE") 2954 return cte 2955 2956 if "with" in this.arg_types: 2957 this.set("with", cte) 2958 else: 2959 self.raise_error(f"{this.key} does not support CTE") 2960 this = cte 2961 2962 return this 2963 2964 # duckdb supports leading with FROM x 2965 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2966 2967 if self._match(TokenType.SELECT): 2968 comments = self._prev_comments 2969 2970 hint = self._parse_hint() 2971 2972 if self._next and not self._next.token_type == TokenType.DOT: 2973 all_ = self._match(TokenType.ALL) 2974 distinct = self._match_set(self.DISTINCT_TOKENS) 2975 else: 2976 all_, distinct = None, None 2977 2978 kind = ( 2979 self._match(TokenType.ALIAS) 2980 and self._match_texts(("STRUCT", "VALUE")) 2981 and self._prev.text.upper() 2982 ) 2983 2984 if distinct: 2985 distinct = self.expression( 2986 exp.Distinct, 2987 on=self._parse_value() if self._match(TokenType.ON) else None, 2988 ) 2989 2990 if all_ and distinct: 2991 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2992 2993 operation_modifiers = [] 2994 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2995 operation_modifiers.append(exp.var(self._prev.text.upper())) 2996 2997 limit = self._parse_limit(top=True) 2998 projections = self._parse_projections() 2999 3000 this = self.expression( 3001 exp.Select, 3002 kind=kind, 3003 hint=hint, 3004 distinct=distinct, 3005 expressions=projections, 3006 limit=limit, 3007 operation_modifiers=operation_modifiers or None, 3008 ) 3009 this.comments = comments 3010 3011 into = self._parse_into() 3012 if into: 3013 this.set("into", into) 3014 3015 if not from_: 3016 from_ = self._parse_from() 3017 3018 if from_: 3019 this.set("from", from_) 3020 3021 this = self._parse_query_modifiers(this) 3022 elif (table or nested) and self._match(TokenType.L_PAREN): 3023 if self._match(TokenType.PIVOT): 3024 this = self._parse_simplified_pivot() 3025 elif self._match(TokenType.FROM): 3026 this = exp.select("*").from_( 3027 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3028 ) 3029 else: 3030 this = ( 3031 self._parse_table() 3032 if table 3033 else self._parse_select(nested=True, parse_set_operation=False) 3034 ) 3035 3036 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3037 # in case a modifier (e.g. join) is following 3038 if table and isinstance(this, exp.Values) and this.alias: 3039 alias = this.args["alias"].pop() 3040 this = exp.Table(this=this, alias=alias) 3041 3042 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3043 3044 self._match_r_paren() 3045 3046 # We return early here so that the UNION isn't attached to the subquery by the 3047 # following call to _parse_set_operations, but instead becomes the parent node 3048 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3049 elif self._match(TokenType.VALUES, advance=False): 3050 this = self._parse_derived_table_values() 3051 elif from_: 3052 this = exp.select("*").from_(from_.this, copy=False) 3053 elif self._match(TokenType.SUMMARIZE): 3054 table = self._match(TokenType.TABLE) 3055 this = self._parse_select() or self._parse_string() or self._parse_table() 3056 return self.expression(exp.Summarize, this=this, table=table) 3057 elif self._match(TokenType.DESCRIBE): 3058 this = self._parse_describe() 3059 elif self._match_text_seq("STREAM"): 3060 this = self._parse_function() 3061 if this: 3062 this = self.expression(exp.Stream, this=this) 3063 else: 3064 self._retreat(self._index - 1) 3065 else: 3066 this = None 3067 3068 return self._parse_set_operations(this) if parse_set_operation else this 3069 3070 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3071 if not skip_with_token and not self._match(TokenType.WITH): 3072 return None 3073 3074 comments = self._prev_comments 3075 recursive = self._match(TokenType.RECURSIVE) 3076 3077 last_comments = None 3078 expressions = [] 3079 while True: 3080 expressions.append(self._parse_cte()) 3081 if last_comments: 3082 expressions[-1].add_comments(last_comments) 3083 3084 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3085 break 3086 else: 3087 self._match(TokenType.WITH) 3088 3089 last_comments = self._prev_comments 3090 3091 return self.expression( 3092 exp.With, comments=comments, expressions=expressions, recursive=recursive 3093 ) 3094 3095 def _parse_cte(self) -> exp.CTE: 3096 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3097 if not alias or not alias.this: 3098 self.raise_error("Expected CTE to have alias") 3099 3100 self._match(TokenType.ALIAS) 3101 comments = self._prev_comments 3102 3103 if self._match_text_seq("NOT", "MATERIALIZED"): 3104 materialized = False 3105 elif self._match_text_seq("MATERIALIZED"): 3106 materialized = True 3107 else: 3108 materialized = None 3109 3110 return self.expression( 3111 exp.CTE, 3112 this=self._parse_wrapped(self._parse_statement), 3113 alias=alias, 3114 materialized=materialized, 3115 comments=comments, 3116 ) 3117 3118 def _parse_table_alias( 3119 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3120 ) -> t.Optional[exp.TableAlias]: 3121 any_token = self._match(TokenType.ALIAS) 3122 alias = ( 3123 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3124 or self._parse_string_as_identifier() 3125 ) 3126 3127 index = self._index 3128 if self._match(TokenType.L_PAREN): 3129 columns = self._parse_csv(self._parse_function_parameter) 3130 self._match_r_paren() if columns else self._retreat(index) 3131 else: 3132 columns = None 3133 3134 if not alias and not columns: 3135 return None 3136 3137 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3138 3139 # We bubble up comments from the Identifier to the TableAlias 3140 if isinstance(alias, exp.Identifier): 3141 table_alias.add_comments(alias.pop_comments()) 3142 3143 return table_alias 3144 3145 def _parse_subquery( 3146 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3147 ) -> t.Optional[exp.Subquery]: 3148 if not this: 3149 return None 3150 3151 return self.expression( 3152 exp.Subquery, 3153 this=this, 3154 pivots=self._parse_pivots(), 3155 alias=self._parse_table_alias() if parse_alias else None, 3156 sample=self._parse_table_sample(), 3157 ) 3158 3159 def _implicit_unnests_to_explicit(self, this: E) -> E: 3160 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3161 3162 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3163 for i, join in enumerate(this.args.get("joins") or []): 3164 table = join.this 3165 normalized_table = table.copy() 3166 normalized_table.meta["maybe_column"] = True 3167 normalized_table = _norm(normalized_table, dialect=self.dialect) 3168 3169 if isinstance(table, exp.Table) and not join.args.get("on"): 3170 if normalized_table.parts[0].name in refs: 3171 table_as_column = table.to_column() 3172 unnest = exp.Unnest(expressions=[table_as_column]) 3173 3174 # Table.to_column creates a parent Alias node that we want to convert to 3175 # a TableAlias and attach to the Unnest, so it matches the parser's output 3176 if isinstance(table.args.get("alias"), exp.TableAlias): 3177 table_as_column.replace(table_as_column.this) 3178 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3179 3180 table.replace(unnest) 3181 3182 refs.add(normalized_table.alias_or_name) 3183 3184 return this 3185 3186 def _parse_query_modifiers( 3187 self, this: t.Optional[exp.Expression] 3188 ) -> t.Optional[exp.Expression]: 3189 if isinstance(this, (exp.Query, exp.Table)): 3190 for join in self._parse_joins(): 3191 this.append("joins", join) 3192 for lateral in iter(self._parse_lateral, None): 3193 this.append("laterals", lateral) 3194 3195 while True: 3196 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3197 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3198 key, expression = parser(self) 3199 3200 if expression: 3201 this.set(key, expression) 3202 if key == "limit": 3203 offset = expression.args.pop("offset", None) 3204 3205 if offset: 3206 offset = exp.Offset(expression=offset) 3207 this.set("offset", offset) 3208 3209 limit_by_expressions = expression.expressions 3210 expression.set("expressions", None) 3211 offset.set("expressions", limit_by_expressions) 3212 continue 3213 break 3214 3215 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3216 this = self._implicit_unnests_to_explicit(this) 3217 3218 return this 3219 3220 def _parse_hint(self) -> t.Optional[exp.Hint]: 3221 if self._match(TokenType.HINT): 3222 hints = [] 3223 for hint in iter( 3224 lambda: self._parse_csv( 3225 lambda: self._parse_function() or self._parse_var(upper=True) 3226 ), 3227 [], 3228 ): 3229 hints.extend(hint) 3230 3231 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3232 self.raise_error("Expected */ after HINT") 3233 3234 return self.expression(exp.Hint, expressions=hints) 3235 3236 return None 3237 3238 def _parse_into(self) -> t.Optional[exp.Into]: 3239 if not self._match(TokenType.INTO): 3240 return None 3241 3242 temp = self._match(TokenType.TEMPORARY) 3243 unlogged = self._match_text_seq("UNLOGGED") 3244 self._match(TokenType.TABLE) 3245 3246 return self.expression( 3247 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3248 ) 3249 3250 def _parse_from( 3251 self, joins: bool = False, skip_from_token: bool = False 3252 ) -> t.Optional[exp.From]: 3253 if not skip_from_token and not self._match(TokenType.FROM): 3254 return None 3255 3256 return self.expression( 3257 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3258 ) 3259 3260 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3261 return self.expression( 3262 exp.MatchRecognizeMeasure, 3263 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3264 this=self._parse_expression(), 3265 ) 3266 3267 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3268 if not self._match(TokenType.MATCH_RECOGNIZE): 3269 return None 3270 3271 self._match_l_paren() 3272 3273 partition = self._parse_partition_by() 3274 order = self._parse_order() 3275 3276 measures = ( 3277 self._parse_csv(self._parse_match_recognize_measure) 3278 if self._match_text_seq("MEASURES") 3279 else None 3280 ) 3281 3282 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3283 rows = exp.var("ONE ROW PER MATCH") 3284 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3285 text = "ALL ROWS PER MATCH" 3286 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3287 text += " SHOW EMPTY MATCHES" 3288 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3289 text += " OMIT EMPTY MATCHES" 3290 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3291 text += " WITH UNMATCHED ROWS" 3292 rows = exp.var(text) 3293 else: 3294 rows = None 3295 3296 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3297 text = "AFTER MATCH SKIP" 3298 if self._match_text_seq("PAST", "LAST", "ROW"): 3299 text += " PAST LAST ROW" 3300 elif self._match_text_seq("TO", "NEXT", "ROW"): 3301 text += " TO NEXT ROW" 3302 elif self._match_text_seq("TO", "FIRST"): 3303 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3304 elif self._match_text_seq("TO", "LAST"): 3305 text += f" TO LAST {self._advance_any().text}" # type: ignore 3306 after = exp.var(text) 3307 else: 3308 after = None 3309 3310 if self._match_text_seq("PATTERN"): 3311 self._match_l_paren() 3312 3313 if not self._curr: 3314 self.raise_error("Expecting )", self._curr) 3315 3316 paren = 1 3317 start = self._curr 3318 3319 while self._curr and paren > 0: 3320 if self._curr.token_type == TokenType.L_PAREN: 3321 paren += 1 3322 if self._curr.token_type == TokenType.R_PAREN: 3323 paren -= 1 3324 3325 end = self._prev 3326 self._advance() 3327 3328 if paren > 0: 3329 self.raise_error("Expecting )", self._curr) 3330 3331 pattern = exp.var(self._find_sql(start, end)) 3332 else: 3333 pattern = None 3334 3335 define = ( 3336 self._parse_csv(self._parse_name_as_expression) 3337 if self._match_text_seq("DEFINE") 3338 else None 3339 ) 3340 3341 self._match_r_paren() 3342 3343 return self.expression( 3344 exp.MatchRecognize, 3345 partition_by=partition, 3346 order=order, 3347 measures=measures, 3348 rows=rows, 3349 after=after, 3350 pattern=pattern, 3351 define=define, 3352 alias=self._parse_table_alias(), 3353 ) 3354 3355 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3356 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3357 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3358 cross_apply = False 3359 3360 if cross_apply is not None: 3361 this = self._parse_select(table=True) 3362 view = None 3363 outer = None 3364 elif self._match(TokenType.LATERAL): 3365 this = self._parse_select(table=True) 3366 view = self._match(TokenType.VIEW) 3367 outer = self._match(TokenType.OUTER) 3368 else: 3369 return None 3370 3371 if not this: 3372 this = ( 3373 self._parse_unnest() 3374 or self._parse_function() 3375 or self._parse_id_var(any_token=False) 3376 ) 3377 3378 while self._match(TokenType.DOT): 3379 this = exp.Dot( 3380 this=this, 3381 expression=self._parse_function() or self._parse_id_var(any_token=False), 3382 ) 3383 3384 if view: 3385 table = self._parse_id_var(any_token=False) 3386 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3387 table_alias: t.Optional[exp.TableAlias] = self.expression( 3388 exp.TableAlias, this=table, columns=columns 3389 ) 3390 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3391 # We move the alias from the lateral's child node to the lateral itself 3392 table_alias = this.args["alias"].pop() 3393 else: 3394 table_alias = self._parse_table_alias() 3395 3396 return self.expression( 3397 exp.Lateral, 3398 this=this, 3399 view=view, 3400 outer=outer, 3401 alias=table_alias, 3402 cross_apply=cross_apply, 3403 ) 3404 3405 def _parse_join_parts( 3406 self, 3407 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3408 return ( 3409 self._match_set(self.JOIN_METHODS) and self._prev, 3410 self._match_set(self.JOIN_SIDES) and self._prev, 3411 self._match_set(self.JOIN_KINDS) and self._prev, 3412 ) 3413 3414 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3415 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3416 this = self._parse_column() 3417 if isinstance(this, exp.Column): 3418 return this.this 3419 return this 3420 3421 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3422 3423 def _parse_join( 3424 self, skip_join_token: bool = False, parse_bracket: bool = False 3425 ) -> t.Optional[exp.Join]: 3426 if self._match(TokenType.COMMA): 3427 return self.expression(exp.Join, this=self._parse_table()) 3428 3429 index = self._index 3430 method, side, kind = self._parse_join_parts() 3431 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3432 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3433 3434 if not skip_join_token and not join: 3435 self._retreat(index) 3436 kind = None 3437 method = None 3438 side = None 3439 3440 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3441 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3442 3443 if not skip_join_token and not join and not outer_apply and not cross_apply: 3444 return None 3445 3446 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3447 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3448 kwargs["expressions"] = self._parse_csv( 3449 lambda: self._parse_table(parse_bracket=parse_bracket) 3450 ) 3451 3452 if method: 3453 kwargs["method"] = method.text 3454 if side: 3455 kwargs["side"] = side.text 3456 if kind: 3457 kwargs["kind"] = kind.text 3458 if hint: 3459 kwargs["hint"] = hint 3460 3461 if self._match(TokenType.MATCH_CONDITION): 3462 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3463 3464 if self._match(TokenType.ON): 3465 kwargs["on"] = self._parse_assignment() 3466 elif self._match(TokenType.USING): 3467 kwargs["using"] = self._parse_using_identifiers() 3468 elif ( 3469 not (outer_apply or cross_apply) 3470 and not isinstance(kwargs["this"], exp.Unnest) 3471 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3472 ): 3473 index = self._index 3474 joins: t.Optional[list] = list(self._parse_joins()) 3475 3476 if joins and self._match(TokenType.ON): 3477 kwargs["on"] = self._parse_assignment() 3478 elif joins and self._match(TokenType.USING): 3479 kwargs["using"] = self._parse_using_identifiers() 3480 else: 3481 joins = None 3482 self._retreat(index) 3483 3484 kwargs["this"].set("joins", joins if joins else None) 3485 3486 comments = [c for token in (method, side, kind) if token for c in token.comments] 3487 return self.expression(exp.Join, comments=comments, **kwargs) 3488 3489 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3490 this = self._parse_assignment() 3491 3492 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3493 return this 3494 3495 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3496 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3497 3498 return this 3499 3500 def _parse_index_params(self) -> exp.IndexParameters: 3501 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3502 3503 if self._match(TokenType.L_PAREN, advance=False): 3504 columns = self._parse_wrapped_csv(self._parse_with_operator) 3505 else: 3506 columns = None 3507 3508 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3509 partition_by = self._parse_partition_by() 3510 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3511 tablespace = ( 3512 self._parse_var(any_token=True) 3513 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3514 else None 3515 ) 3516 where = self._parse_where() 3517 3518 on = self._parse_field() if self._match(TokenType.ON) else None 3519 3520 return self.expression( 3521 exp.IndexParameters, 3522 using=using, 3523 columns=columns, 3524 include=include, 3525 partition_by=partition_by, 3526 where=where, 3527 with_storage=with_storage, 3528 tablespace=tablespace, 3529 on=on, 3530 ) 3531 3532 def _parse_index( 3533 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3534 ) -> t.Optional[exp.Index]: 3535 if index or anonymous: 3536 unique = None 3537 primary = None 3538 amp = None 3539 3540 self._match(TokenType.ON) 3541 self._match(TokenType.TABLE) # hive 3542 table = self._parse_table_parts(schema=True) 3543 else: 3544 unique = self._match(TokenType.UNIQUE) 3545 primary = self._match_text_seq("PRIMARY") 3546 amp = self._match_text_seq("AMP") 3547 3548 if not self._match(TokenType.INDEX): 3549 return None 3550 3551 index = self._parse_id_var() 3552 table = None 3553 3554 params = self._parse_index_params() 3555 3556 return self.expression( 3557 exp.Index, 3558 this=index, 3559 table=table, 3560 unique=unique, 3561 primary=primary, 3562 amp=amp, 3563 params=params, 3564 ) 3565 3566 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3567 hints: t.List[exp.Expression] = [] 3568 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3569 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3570 hints.append( 3571 self.expression( 3572 exp.WithTableHint, 3573 expressions=self._parse_csv( 3574 lambda: self._parse_function() or self._parse_var(any_token=True) 3575 ), 3576 ) 3577 ) 3578 self._match_r_paren() 3579 else: 3580 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3581 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3582 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3583 3584 self._match_set((TokenType.INDEX, TokenType.KEY)) 3585 if self._match(TokenType.FOR): 3586 hint.set("target", self._advance_any() and self._prev.text.upper()) 3587 3588 hint.set("expressions", self._parse_wrapped_id_vars()) 3589 hints.append(hint) 3590 3591 return hints or None 3592 3593 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3594 return ( 3595 (not schema and self._parse_function(optional_parens=False)) 3596 or self._parse_id_var(any_token=False) 3597 or self._parse_string_as_identifier() 3598 or self._parse_placeholder() 3599 ) 3600 3601 def _parse_table_parts( 3602 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3603 ) -> exp.Table: 3604 catalog = None 3605 db = None 3606 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3607 3608 while self._match(TokenType.DOT): 3609 if catalog: 3610 # This allows nesting the table in arbitrarily many dot expressions if needed 3611 table = self.expression( 3612 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3613 ) 3614 else: 3615 catalog = db 3616 db = table 3617 # "" used for tsql FROM a..b case 3618 table = self._parse_table_part(schema=schema) or "" 3619 3620 if ( 3621 wildcard 3622 and self._is_connected() 3623 and (isinstance(table, exp.Identifier) or not table) 3624 and self._match(TokenType.STAR) 3625 ): 3626 if isinstance(table, exp.Identifier): 3627 table.args["this"] += "*" 3628 else: 3629 table = exp.Identifier(this="*") 3630 3631 # We bubble up comments from the Identifier to the Table 3632 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3633 3634 if is_db_reference: 3635 catalog = db 3636 db = table 3637 table = None 3638 3639 if not table and not is_db_reference: 3640 self.raise_error(f"Expected table name but got {self._curr}") 3641 if not db and is_db_reference: 3642 self.raise_error(f"Expected database name but got {self._curr}") 3643 3644 table = self.expression( 3645 exp.Table, 3646 comments=comments, 3647 this=table, 3648 db=db, 3649 catalog=catalog, 3650 ) 3651 3652 changes = self._parse_changes() 3653 if changes: 3654 table.set("changes", changes) 3655 3656 at_before = self._parse_historical_data() 3657 if at_before: 3658 table.set("when", at_before) 3659 3660 pivots = self._parse_pivots() 3661 if pivots: 3662 table.set("pivots", pivots) 3663 3664 return table 3665 3666 def _parse_table( 3667 self, 3668 schema: bool = False, 3669 joins: bool = False, 3670 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3671 parse_bracket: bool = False, 3672 is_db_reference: bool = False, 3673 parse_partition: bool = False, 3674 ) -> t.Optional[exp.Expression]: 3675 lateral = self._parse_lateral() 3676 if lateral: 3677 return lateral 3678 3679 unnest = self._parse_unnest() 3680 if unnest: 3681 return unnest 3682 3683 values = self._parse_derived_table_values() 3684 if values: 3685 return values 3686 3687 subquery = self._parse_select(table=True) 3688 if subquery: 3689 if not subquery.args.get("pivots"): 3690 subquery.set("pivots", self._parse_pivots()) 3691 return subquery 3692 3693 bracket = parse_bracket and self._parse_bracket(None) 3694 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3695 3696 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3697 self._parse_table 3698 ) 3699 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3700 3701 only = self._match(TokenType.ONLY) 3702 3703 this = t.cast( 3704 exp.Expression, 3705 bracket 3706 or rows_from 3707 or self._parse_bracket( 3708 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3709 ), 3710 ) 3711 3712 if only: 3713 this.set("only", only) 3714 3715 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3716 self._match_text_seq("*") 3717 3718 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3719 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3720 this.set("partition", self._parse_partition()) 3721 3722 if schema: 3723 return self._parse_schema(this=this) 3724 3725 version = self._parse_version() 3726 3727 if version: 3728 this.set("version", version) 3729 3730 if self.dialect.ALIAS_POST_TABLESAMPLE: 3731 this.set("sample", self._parse_table_sample()) 3732 3733 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3734 if alias: 3735 this.set("alias", alias) 3736 3737 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3738 return self.expression( 3739 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3740 ) 3741 3742 this.set("hints", self._parse_table_hints()) 3743 3744 if not this.args.get("pivots"): 3745 this.set("pivots", self._parse_pivots()) 3746 3747 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3748 this.set("sample", self._parse_table_sample()) 3749 3750 if joins: 3751 for join in self._parse_joins(): 3752 this.append("joins", join) 3753 3754 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3755 this.set("ordinality", True) 3756 this.set("alias", self._parse_table_alias()) 3757 3758 return this 3759 3760 def _parse_version(self) -> t.Optional[exp.Version]: 3761 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3762 this = "TIMESTAMP" 3763 elif self._match(TokenType.VERSION_SNAPSHOT): 3764 this = "VERSION" 3765 else: 3766 return None 3767 3768 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3769 kind = self._prev.text.upper() 3770 start = self._parse_bitwise() 3771 self._match_texts(("TO", "AND")) 3772 end = self._parse_bitwise() 3773 expression: t.Optional[exp.Expression] = self.expression( 3774 exp.Tuple, expressions=[start, end] 3775 ) 3776 elif self._match_text_seq("CONTAINED", "IN"): 3777 kind = "CONTAINED IN" 3778 expression = self.expression( 3779 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3780 ) 3781 elif self._match(TokenType.ALL): 3782 kind = "ALL" 3783 expression = None 3784 else: 3785 self._match_text_seq("AS", "OF") 3786 kind = "AS OF" 3787 expression = self._parse_type() 3788 3789 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3790 3791 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3792 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3793 index = self._index 3794 historical_data = None 3795 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3796 this = self._prev.text.upper() 3797 kind = ( 3798 self._match(TokenType.L_PAREN) 3799 and self._match_texts(self.HISTORICAL_DATA_KIND) 3800 and self._prev.text.upper() 3801 ) 3802 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3803 3804 if expression: 3805 self._match_r_paren() 3806 historical_data = self.expression( 3807 exp.HistoricalData, this=this, kind=kind, expression=expression 3808 ) 3809 else: 3810 self._retreat(index) 3811 3812 return historical_data 3813 3814 def _parse_changes(self) -> t.Optional[exp.Changes]: 3815 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3816 return None 3817 3818 information = self._parse_var(any_token=True) 3819 self._match_r_paren() 3820 3821 return self.expression( 3822 exp.Changes, 3823 information=information, 3824 at_before=self._parse_historical_data(), 3825 end=self._parse_historical_data(), 3826 ) 3827 3828 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3829 if not self._match(TokenType.UNNEST): 3830 return None 3831 3832 expressions = self._parse_wrapped_csv(self._parse_equality) 3833 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3834 3835 alias = self._parse_table_alias() if with_alias else None 3836 3837 if alias: 3838 if self.dialect.UNNEST_COLUMN_ONLY: 3839 if alias.args.get("columns"): 3840 self.raise_error("Unexpected extra column alias in unnest.") 3841 3842 alias.set("columns", [alias.this]) 3843 alias.set("this", None) 3844 3845 columns = alias.args.get("columns") or [] 3846 if offset and len(expressions) < len(columns): 3847 offset = columns.pop() 3848 3849 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3850 self._match(TokenType.ALIAS) 3851 offset = self._parse_id_var( 3852 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3853 ) or exp.to_identifier("offset") 3854 3855 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3856 3857 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3858 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3859 if not is_derived and not ( 3860 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3861 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3862 ): 3863 return None 3864 3865 expressions = self._parse_csv(self._parse_value) 3866 alias = self._parse_table_alias() 3867 3868 if is_derived: 3869 self._match_r_paren() 3870 3871 return self.expression( 3872 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3873 ) 3874 3875 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3876 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3877 as_modifier and self._match_text_seq("USING", "SAMPLE") 3878 ): 3879 return None 3880 3881 bucket_numerator = None 3882 bucket_denominator = None 3883 bucket_field = None 3884 percent = None 3885 size = None 3886 seed = None 3887 3888 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3889 matched_l_paren = self._match(TokenType.L_PAREN) 3890 3891 if self.TABLESAMPLE_CSV: 3892 num = None 3893 expressions = self._parse_csv(self._parse_primary) 3894 else: 3895 expressions = None 3896 num = ( 3897 self._parse_factor() 3898 if self._match(TokenType.NUMBER, advance=False) 3899 else self._parse_primary() or self._parse_placeholder() 3900 ) 3901 3902 if self._match_text_seq("BUCKET"): 3903 bucket_numerator = self._parse_number() 3904 self._match_text_seq("OUT", "OF") 3905 bucket_denominator = bucket_denominator = self._parse_number() 3906 self._match(TokenType.ON) 3907 bucket_field = self._parse_field() 3908 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3909 percent = num 3910 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3911 size = num 3912 else: 3913 percent = num 3914 3915 if matched_l_paren: 3916 self._match_r_paren() 3917 3918 if self._match(TokenType.L_PAREN): 3919 method = self._parse_var(upper=True) 3920 seed = self._match(TokenType.COMMA) and self._parse_number() 3921 self._match_r_paren() 3922 elif self._match_texts(("SEED", "REPEATABLE")): 3923 seed = self._parse_wrapped(self._parse_number) 3924 3925 if not method and self.DEFAULT_SAMPLING_METHOD: 3926 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3927 3928 return self.expression( 3929 exp.TableSample, 3930 expressions=expressions, 3931 method=method, 3932 bucket_numerator=bucket_numerator, 3933 bucket_denominator=bucket_denominator, 3934 bucket_field=bucket_field, 3935 percent=percent, 3936 size=size, 3937 seed=seed, 3938 ) 3939 3940 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3941 return list(iter(self._parse_pivot, None)) or None 3942 3943 def _parse_joins(self) -> t.Iterator[exp.Join]: 3944 return iter(self._parse_join, None) 3945 3946 # https://duckdb.org/docs/sql/statements/pivot 3947 def _parse_simplified_pivot(self) -> exp.Pivot: 3948 def _parse_on() -> t.Optional[exp.Expression]: 3949 this = self._parse_bitwise() 3950 return self._parse_in(this) if self._match(TokenType.IN) else this 3951 3952 this = self._parse_table() 3953 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3954 using = self._match(TokenType.USING) and self._parse_csv( 3955 lambda: self._parse_alias(self._parse_function()) 3956 ) 3957 group = self._parse_group() 3958 return self.expression( 3959 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3960 ) 3961 3962 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3963 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3964 this = self._parse_select_or_expression() 3965 3966 self._match(TokenType.ALIAS) 3967 alias = self._parse_bitwise() 3968 if alias: 3969 if isinstance(alias, exp.Column) and not alias.db: 3970 alias = alias.this 3971 return self.expression(exp.PivotAlias, this=this, alias=alias) 3972 3973 return this 3974 3975 value = self._parse_column() 3976 3977 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3978 self.raise_error("Expecting IN (") 3979 3980 if self._match(TokenType.ANY): 3981 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3982 else: 3983 exprs = self._parse_csv(_parse_aliased_expression) 3984 3985 self._match_r_paren() 3986 return self.expression(exp.In, this=value, expressions=exprs) 3987 3988 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3989 index = self._index 3990 include_nulls = None 3991 3992 if self._match(TokenType.PIVOT): 3993 unpivot = False 3994 elif self._match(TokenType.UNPIVOT): 3995 unpivot = True 3996 3997 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3998 if self._match_text_seq("INCLUDE", "NULLS"): 3999 include_nulls = True 4000 elif self._match_text_seq("EXCLUDE", "NULLS"): 4001 include_nulls = False 4002 else: 4003 return None 4004 4005 expressions = [] 4006 4007 if not self._match(TokenType.L_PAREN): 4008 self._retreat(index) 4009 return None 4010 4011 if unpivot: 4012 expressions = self._parse_csv(self._parse_column) 4013 else: 4014 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4015 4016 if not expressions: 4017 self.raise_error("Failed to parse PIVOT's aggregation list") 4018 4019 if not self._match(TokenType.FOR): 4020 self.raise_error("Expecting FOR") 4021 4022 field = self._parse_pivot_in() 4023 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4024 self._parse_bitwise 4025 ) 4026 4027 self._match_r_paren() 4028 4029 pivot = self.expression( 4030 exp.Pivot, 4031 expressions=expressions, 4032 field=field, 4033 unpivot=unpivot, 4034 include_nulls=include_nulls, 4035 default_on_null=default_on_null, 4036 ) 4037 4038 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4039 pivot.set("alias", self._parse_table_alias()) 4040 4041 if not unpivot: 4042 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4043 4044 columns: t.List[exp.Expression] = [] 4045 for fld in pivot.args["field"].expressions: 4046 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4047 for name in names: 4048 if self.PREFIXED_PIVOT_COLUMNS: 4049 name = f"{name}_{field_name}" if name else field_name 4050 else: 4051 name = f"{field_name}_{name}" if name else field_name 4052 4053 columns.append(exp.to_identifier(name)) 4054 4055 pivot.set("columns", columns) 4056 4057 return pivot 4058 4059 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4060 return [agg.alias for agg in aggregations] 4061 4062 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4063 if not skip_where_token and not self._match(TokenType.PREWHERE): 4064 return None 4065 4066 return self.expression( 4067 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4068 ) 4069 4070 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4071 if not skip_where_token and not self._match(TokenType.WHERE): 4072 return None 4073 4074 return self.expression( 4075 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4076 ) 4077 4078 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4079 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4080 return None 4081 4082 elements: t.Dict[str, t.Any] = defaultdict(list) 4083 4084 if self._match(TokenType.ALL): 4085 elements["all"] = True 4086 elif self._match(TokenType.DISTINCT): 4087 elements["all"] = False 4088 4089 while True: 4090 index = self._index 4091 4092 elements["expressions"].extend( 4093 self._parse_csv( 4094 lambda: None 4095 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4096 else self._parse_assignment() 4097 ) 4098 ) 4099 4100 before_with_index = self._index 4101 with_prefix = self._match(TokenType.WITH) 4102 4103 if self._match(TokenType.ROLLUP): 4104 elements["rollup"].append( 4105 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4106 ) 4107 elif self._match(TokenType.CUBE): 4108 elements["cube"].append( 4109 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4110 ) 4111 elif self._match(TokenType.GROUPING_SETS): 4112 elements["grouping_sets"].append( 4113 self.expression( 4114 exp.GroupingSets, 4115 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4116 ) 4117 ) 4118 elif self._match_text_seq("TOTALS"): 4119 elements["totals"] = True # type: ignore 4120 4121 if before_with_index <= self._index <= before_with_index + 1: 4122 self._retreat(before_with_index) 4123 break 4124 4125 if index == self._index: 4126 break 4127 4128 return self.expression(exp.Group, **elements) # type: ignore 4129 4130 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4131 return self.expression( 4132 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4133 ) 4134 4135 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4136 if self._match(TokenType.L_PAREN): 4137 grouping_set = self._parse_csv(self._parse_column) 4138 self._match_r_paren() 4139 return self.expression(exp.Tuple, expressions=grouping_set) 4140 4141 return self._parse_column() 4142 4143 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4144 if not skip_having_token and not self._match(TokenType.HAVING): 4145 return None 4146 return self.expression(exp.Having, this=self._parse_assignment()) 4147 4148 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4149 if not self._match(TokenType.QUALIFY): 4150 return None 4151 return self.expression(exp.Qualify, this=self._parse_assignment()) 4152 4153 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4154 if skip_start_token: 4155 start = None 4156 elif self._match(TokenType.START_WITH): 4157 start = self._parse_assignment() 4158 else: 4159 return None 4160 4161 self._match(TokenType.CONNECT_BY) 4162 nocycle = self._match_text_seq("NOCYCLE") 4163 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4164 exp.Prior, this=self._parse_bitwise() 4165 ) 4166 connect = self._parse_assignment() 4167 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4168 4169 if not start and self._match(TokenType.START_WITH): 4170 start = self._parse_assignment() 4171 4172 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4173 4174 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4175 this = self._parse_id_var(any_token=True) 4176 if self._match(TokenType.ALIAS): 4177 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4178 return this 4179 4180 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4181 if self._match_text_seq("INTERPOLATE"): 4182 return self._parse_wrapped_csv(self._parse_name_as_expression) 4183 return None 4184 4185 def _parse_order( 4186 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4187 ) -> t.Optional[exp.Expression]: 4188 siblings = None 4189 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4190 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4191 return this 4192 4193 siblings = True 4194 4195 return self.expression( 4196 exp.Order, 4197 this=this, 4198 expressions=self._parse_csv(self._parse_ordered), 4199 siblings=siblings, 4200 ) 4201 4202 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4203 if not self._match(token): 4204 return None 4205 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4206 4207 def _parse_ordered( 4208 self, parse_method: t.Optional[t.Callable] = None 4209 ) -> t.Optional[exp.Ordered]: 4210 this = parse_method() if parse_method else self._parse_assignment() 4211 if not this: 4212 return None 4213 4214 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4215 this = exp.var("ALL") 4216 4217 asc = self._match(TokenType.ASC) 4218 desc = self._match(TokenType.DESC) or (asc and False) 4219 4220 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4221 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4222 4223 nulls_first = is_nulls_first or False 4224 explicitly_null_ordered = is_nulls_first or is_nulls_last 4225 4226 if ( 4227 not explicitly_null_ordered 4228 and ( 4229 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4230 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4231 ) 4232 and self.dialect.NULL_ORDERING != "nulls_are_last" 4233 ): 4234 nulls_first = True 4235 4236 if self._match_text_seq("WITH", "FILL"): 4237 with_fill = self.expression( 4238 exp.WithFill, 4239 **{ # type: ignore 4240 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4241 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4242 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4243 "interpolate": self._parse_interpolate(), 4244 }, 4245 ) 4246 else: 4247 with_fill = None 4248 4249 return self.expression( 4250 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4251 ) 4252 4253 def _parse_limit( 4254 self, 4255 this: t.Optional[exp.Expression] = None, 4256 top: bool = False, 4257 skip_limit_token: bool = False, 4258 ) -> t.Optional[exp.Expression]: 4259 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4260 comments = self._prev_comments 4261 if top: 4262 limit_paren = self._match(TokenType.L_PAREN) 4263 expression = self._parse_term() if limit_paren else self._parse_number() 4264 4265 if limit_paren: 4266 self._match_r_paren() 4267 else: 4268 expression = self._parse_term() 4269 4270 if self._match(TokenType.COMMA): 4271 offset = expression 4272 expression = self._parse_term() 4273 else: 4274 offset = None 4275 4276 limit_exp = self.expression( 4277 exp.Limit, 4278 this=this, 4279 expression=expression, 4280 offset=offset, 4281 comments=comments, 4282 expressions=self._parse_limit_by(), 4283 ) 4284 4285 return limit_exp 4286 4287 if self._match(TokenType.FETCH): 4288 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4289 direction = self._prev.text.upper() if direction else "FIRST" 4290 4291 count = self._parse_field(tokens=self.FETCH_TOKENS) 4292 percent = self._match(TokenType.PERCENT) 4293 4294 self._match_set((TokenType.ROW, TokenType.ROWS)) 4295 4296 only = self._match_text_seq("ONLY") 4297 with_ties = self._match_text_seq("WITH", "TIES") 4298 4299 if only and with_ties: 4300 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4301 4302 return self.expression( 4303 exp.Fetch, 4304 direction=direction, 4305 count=count, 4306 percent=percent, 4307 with_ties=with_ties, 4308 ) 4309 4310 return this 4311 4312 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4313 if not self._match(TokenType.OFFSET): 4314 return this 4315 4316 count = self._parse_term() 4317 self._match_set((TokenType.ROW, TokenType.ROWS)) 4318 4319 return self.expression( 4320 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4321 ) 4322 4323 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4324 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4325 4326 def _parse_locks(self) -> t.List[exp.Lock]: 4327 locks = [] 4328 while True: 4329 if self._match_text_seq("FOR", "UPDATE"): 4330 update = True 4331 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4332 "LOCK", "IN", "SHARE", "MODE" 4333 ): 4334 update = False 4335 else: 4336 break 4337 4338 expressions = None 4339 if self._match_text_seq("OF"): 4340 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4341 4342 wait: t.Optional[bool | exp.Expression] = None 4343 if self._match_text_seq("NOWAIT"): 4344 wait = True 4345 elif self._match_text_seq("WAIT"): 4346 wait = self._parse_primary() 4347 elif self._match_text_seq("SKIP", "LOCKED"): 4348 wait = False 4349 4350 locks.append( 4351 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4352 ) 4353 4354 return locks 4355 4356 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4357 while this and self._match_set(self.SET_OPERATIONS): 4358 token_type = self._prev.token_type 4359 4360 if token_type == TokenType.UNION: 4361 operation: t.Type[exp.SetOperation] = exp.Union 4362 elif token_type == TokenType.EXCEPT: 4363 operation = exp.Except 4364 else: 4365 operation = exp.Intersect 4366 4367 comments = self._prev.comments 4368 4369 if self._match(TokenType.DISTINCT): 4370 distinct: t.Optional[bool] = True 4371 elif self._match(TokenType.ALL): 4372 distinct = False 4373 else: 4374 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4375 if distinct is None: 4376 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4377 4378 by_name = self._match_text_seq("BY", "NAME") 4379 expression = self._parse_select(nested=True, parse_set_operation=False) 4380 4381 this = self.expression( 4382 operation, 4383 comments=comments, 4384 this=this, 4385 distinct=distinct, 4386 by_name=by_name, 4387 expression=expression, 4388 ) 4389 4390 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4391 expression = this.expression 4392 4393 if expression: 4394 for arg in self.SET_OP_MODIFIERS: 4395 expr = expression.args.get(arg) 4396 if expr: 4397 this.set(arg, expr.pop()) 4398 4399 return this 4400 4401 def _parse_expression(self) -> t.Optional[exp.Expression]: 4402 return self._parse_alias(self._parse_assignment()) 4403 4404 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4405 this = self._parse_disjunction() 4406 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4407 # This allows us to parse <non-identifier token> := <expr> 4408 this = exp.column( 4409 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4410 ) 4411 4412 while self._match_set(self.ASSIGNMENT): 4413 if isinstance(this, exp.Column) and len(this.parts) == 1: 4414 this = this.this 4415 4416 this = self.expression( 4417 self.ASSIGNMENT[self._prev.token_type], 4418 this=this, 4419 comments=self._prev_comments, 4420 expression=self._parse_assignment(), 4421 ) 4422 4423 return this 4424 4425 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4426 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4427 4428 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4429 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4430 4431 def _parse_equality(self) -> t.Optional[exp.Expression]: 4432 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4433 4434 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4435 return self._parse_tokens(self._parse_range, self.COMPARISON) 4436 4437 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4438 this = this or self._parse_bitwise() 4439 negate = self._match(TokenType.NOT) 4440 4441 if self._match_set(self.RANGE_PARSERS): 4442 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4443 if not expression: 4444 return this 4445 4446 this = expression 4447 elif self._match(TokenType.ISNULL): 4448 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4449 4450 # Postgres supports ISNULL and NOTNULL for conditions. 4451 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4452 if self._match(TokenType.NOTNULL): 4453 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4454 this = self.expression(exp.Not, this=this) 4455 4456 if negate: 4457 this = self._negate_range(this) 4458 4459 if self._match(TokenType.IS): 4460 this = self._parse_is(this) 4461 4462 return this 4463 4464 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4465 if not this: 4466 return this 4467 4468 return self.expression(exp.Not, this=this) 4469 4470 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4471 index = self._index - 1 4472 negate = self._match(TokenType.NOT) 4473 4474 if self._match_text_seq("DISTINCT", "FROM"): 4475 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4476 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4477 4478 if self._match(TokenType.JSON): 4479 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4480 4481 if self._match_text_seq("WITH"): 4482 _with = True 4483 elif self._match_text_seq("WITHOUT"): 4484 _with = False 4485 else: 4486 _with = None 4487 4488 unique = self._match(TokenType.UNIQUE) 4489 self._match_text_seq("KEYS") 4490 expression: t.Optional[exp.Expression] = self.expression( 4491 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4492 ) 4493 else: 4494 expression = self._parse_primary() or self._parse_null() 4495 if not expression: 4496 self._retreat(index) 4497 return None 4498 4499 this = self.expression(exp.Is, this=this, expression=expression) 4500 return self.expression(exp.Not, this=this) if negate else this 4501 4502 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4503 unnest = self._parse_unnest(with_alias=False) 4504 if unnest: 4505 this = self.expression(exp.In, this=this, unnest=unnest) 4506 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4507 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4508 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4509 4510 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4511 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4512 else: 4513 this = self.expression(exp.In, this=this, expressions=expressions) 4514 4515 if matched_l_paren: 4516 self._match_r_paren(this) 4517 elif not self._match(TokenType.R_BRACKET, expression=this): 4518 self.raise_error("Expecting ]") 4519 else: 4520 this = self.expression(exp.In, this=this, field=self._parse_column()) 4521 4522 return this 4523 4524 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4525 low = self._parse_bitwise() 4526 self._match(TokenType.AND) 4527 high = self._parse_bitwise() 4528 return self.expression(exp.Between, this=this, low=low, high=high) 4529 4530 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4531 if not self._match(TokenType.ESCAPE): 4532 return this 4533 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4534 4535 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4536 index = self._index 4537 4538 if not self._match(TokenType.INTERVAL) and match_interval: 4539 return None 4540 4541 if self._match(TokenType.STRING, advance=False): 4542 this = self._parse_primary() 4543 else: 4544 this = self._parse_term() 4545 4546 if not this or ( 4547 isinstance(this, exp.Column) 4548 and not this.table 4549 and not this.this.quoted 4550 and this.name.upper() == "IS" 4551 ): 4552 self._retreat(index) 4553 return None 4554 4555 unit = self._parse_function() or ( 4556 not self._match(TokenType.ALIAS, advance=False) 4557 and self._parse_var(any_token=True, upper=True) 4558 ) 4559 4560 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4561 # each INTERVAL expression into this canonical form so it's easy to transpile 4562 if this and this.is_number: 4563 this = exp.Literal.string(this.to_py()) 4564 elif this and this.is_string: 4565 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4566 if len(parts) == 1: 4567 if unit: 4568 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4569 self._retreat(self._index - 1) 4570 4571 this = exp.Literal.string(parts[0][0]) 4572 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4573 4574 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4575 unit = self.expression( 4576 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4577 ) 4578 4579 interval = self.expression(exp.Interval, this=this, unit=unit) 4580 4581 index = self._index 4582 self._match(TokenType.PLUS) 4583 4584 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4585 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4586 return self.expression( 4587 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4588 ) 4589 4590 self._retreat(index) 4591 return interval 4592 4593 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4594 this = self._parse_term() 4595 4596 while True: 4597 if self._match_set(self.BITWISE): 4598 this = self.expression( 4599 self.BITWISE[self._prev.token_type], 4600 this=this, 4601 expression=self._parse_term(), 4602 ) 4603 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4604 this = self.expression( 4605 exp.DPipe, 4606 this=this, 4607 expression=self._parse_term(), 4608 safe=not self.dialect.STRICT_STRING_CONCAT, 4609 ) 4610 elif self._match(TokenType.DQMARK): 4611 this = self.expression( 4612 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4613 ) 4614 elif self._match_pair(TokenType.LT, TokenType.LT): 4615 this = self.expression( 4616 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4617 ) 4618 elif self._match_pair(TokenType.GT, TokenType.GT): 4619 this = self.expression( 4620 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4621 ) 4622 else: 4623 break 4624 4625 return this 4626 4627 def _parse_term(self) -> t.Optional[exp.Expression]: 4628 this = self._parse_factor() 4629 4630 while self._match_set(self.TERM): 4631 klass = self.TERM[self._prev.token_type] 4632 comments = self._prev_comments 4633 expression = self._parse_factor() 4634 4635 this = self.expression(klass, this=this, comments=comments, expression=expression) 4636 4637 if isinstance(this, exp.Collate): 4638 expr = this.expression 4639 4640 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4641 # fallback to Identifier / Var 4642 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4643 ident = expr.this 4644 if isinstance(ident, exp.Identifier): 4645 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4646 4647 return this 4648 4649 def _parse_factor(self) -> t.Optional[exp.Expression]: 4650 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4651 this = parse_method() 4652 4653 while self._match_set(self.FACTOR): 4654 klass = self.FACTOR[self._prev.token_type] 4655 comments = self._prev_comments 4656 expression = parse_method() 4657 4658 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4659 self._retreat(self._index - 1) 4660 return this 4661 4662 this = self.expression(klass, this=this, comments=comments, expression=expression) 4663 4664 if isinstance(this, exp.Div): 4665 this.args["typed"] = self.dialect.TYPED_DIVISION 4666 this.args["safe"] = self.dialect.SAFE_DIVISION 4667 4668 return this 4669 4670 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4671 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4672 4673 def _parse_unary(self) -> t.Optional[exp.Expression]: 4674 if self._match_set(self.UNARY_PARSERS): 4675 return self.UNARY_PARSERS[self._prev.token_type](self) 4676 return self._parse_at_time_zone(self._parse_type()) 4677 4678 def _parse_type( 4679 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4680 ) -> t.Optional[exp.Expression]: 4681 interval = parse_interval and self._parse_interval() 4682 if interval: 4683 return interval 4684 4685 index = self._index 4686 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4687 4688 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4689 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4690 if isinstance(data_type, exp.Cast): 4691 # This constructor can contain ops directly after it, for instance struct unnesting: 4692 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4693 return self._parse_column_ops(data_type) 4694 4695 if data_type: 4696 index2 = self._index 4697 this = self._parse_primary() 4698 4699 if isinstance(this, exp.Literal): 4700 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4701 if parser: 4702 return parser(self, this, data_type) 4703 4704 return self.expression(exp.Cast, this=this, to=data_type) 4705 4706 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4707 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4708 # 4709 # If the index difference here is greater than 1, that means the parser itself must have 4710 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4711 # 4712 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4713 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4714 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4715 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4716 # 4717 # In these cases, we don't really want to return the converted type, but instead retreat 4718 # and try to parse a Column or Identifier in the section below. 4719 if data_type.expressions and index2 - index > 1: 4720 self._retreat(index2) 4721 return self._parse_column_ops(data_type) 4722 4723 self._retreat(index) 4724 4725 if fallback_to_identifier: 4726 return self._parse_id_var() 4727 4728 this = self._parse_column() 4729 return this and self._parse_column_ops(this) 4730 4731 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4732 this = self._parse_type() 4733 if not this: 4734 return None 4735 4736 if isinstance(this, exp.Column) and not this.table: 4737 this = exp.var(this.name.upper()) 4738 4739 return self.expression( 4740 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4741 ) 4742 4743 def _parse_types( 4744 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4745 ) -> t.Optional[exp.Expression]: 4746 index = self._index 4747 4748 this: t.Optional[exp.Expression] = None 4749 prefix = self._match_text_seq("SYSUDTLIB", ".") 4750 4751 if not self._match_set(self.TYPE_TOKENS): 4752 identifier = allow_identifiers and self._parse_id_var( 4753 any_token=False, tokens=(TokenType.VAR,) 4754 ) 4755 if isinstance(identifier, exp.Identifier): 4756 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4757 4758 if len(tokens) != 1: 4759 self.raise_error("Unexpected identifier", self._prev) 4760 4761 if tokens[0].token_type in self.TYPE_TOKENS: 4762 self._prev = tokens[0] 4763 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4764 type_name = identifier.name 4765 4766 while self._match(TokenType.DOT): 4767 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4768 4769 this = exp.DataType.build(type_name, udt=True) 4770 else: 4771 self._retreat(self._index - 1) 4772 return None 4773 else: 4774 return None 4775 4776 type_token = self._prev.token_type 4777 4778 if type_token == TokenType.PSEUDO_TYPE: 4779 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4780 4781 if type_token == TokenType.OBJECT_IDENTIFIER: 4782 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4783 4784 # https://materialize.com/docs/sql/types/map/ 4785 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4786 key_type = self._parse_types( 4787 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4788 ) 4789 if not self._match(TokenType.FARROW): 4790 self._retreat(index) 4791 return None 4792 4793 value_type = self._parse_types( 4794 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4795 ) 4796 if not self._match(TokenType.R_BRACKET): 4797 self._retreat(index) 4798 return None 4799 4800 return exp.DataType( 4801 this=exp.DataType.Type.MAP, 4802 expressions=[key_type, value_type], 4803 nested=True, 4804 prefix=prefix, 4805 ) 4806 4807 nested = type_token in self.NESTED_TYPE_TOKENS 4808 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4809 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4810 expressions = None 4811 maybe_func = False 4812 4813 if self._match(TokenType.L_PAREN): 4814 if is_struct: 4815 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4816 elif nested: 4817 expressions = self._parse_csv( 4818 lambda: self._parse_types( 4819 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4820 ) 4821 ) 4822 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4823 this = expressions[0] 4824 this.set("nullable", True) 4825 self._match_r_paren() 4826 return this 4827 elif type_token in self.ENUM_TYPE_TOKENS: 4828 expressions = self._parse_csv(self._parse_equality) 4829 elif is_aggregate: 4830 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4831 any_token=False, tokens=(TokenType.VAR,) 4832 ) 4833 if not func_or_ident or not self._match(TokenType.COMMA): 4834 return None 4835 expressions = self._parse_csv( 4836 lambda: self._parse_types( 4837 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4838 ) 4839 ) 4840 expressions.insert(0, func_or_ident) 4841 else: 4842 expressions = self._parse_csv(self._parse_type_size) 4843 4844 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4845 if type_token == TokenType.VECTOR and len(expressions) == 2: 4846 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4847 4848 if not expressions or not self._match(TokenType.R_PAREN): 4849 self._retreat(index) 4850 return None 4851 4852 maybe_func = True 4853 4854 values: t.Optional[t.List[exp.Expression]] = None 4855 4856 if nested and self._match(TokenType.LT): 4857 if is_struct: 4858 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4859 else: 4860 expressions = self._parse_csv( 4861 lambda: self._parse_types( 4862 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4863 ) 4864 ) 4865 4866 if not self._match(TokenType.GT): 4867 self.raise_error("Expecting >") 4868 4869 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4870 values = self._parse_csv(self._parse_assignment) 4871 if not values and is_struct: 4872 values = None 4873 self._retreat(self._index - 1) 4874 else: 4875 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4876 4877 if type_token in self.TIMESTAMPS: 4878 if self._match_text_seq("WITH", "TIME", "ZONE"): 4879 maybe_func = False 4880 tz_type = ( 4881 exp.DataType.Type.TIMETZ 4882 if type_token in self.TIMES 4883 else exp.DataType.Type.TIMESTAMPTZ 4884 ) 4885 this = exp.DataType(this=tz_type, expressions=expressions) 4886 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4887 maybe_func = False 4888 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4889 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4890 maybe_func = False 4891 elif type_token == TokenType.INTERVAL: 4892 unit = self._parse_var(upper=True) 4893 if unit: 4894 if self._match_text_seq("TO"): 4895 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4896 4897 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4898 else: 4899 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4900 4901 if maybe_func and check_func: 4902 index2 = self._index 4903 peek = self._parse_string() 4904 4905 if not peek: 4906 self._retreat(index) 4907 return None 4908 4909 self._retreat(index2) 4910 4911 if not this: 4912 if self._match_text_seq("UNSIGNED"): 4913 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4914 if not unsigned_type_token: 4915 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4916 4917 type_token = unsigned_type_token or type_token 4918 4919 this = exp.DataType( 4920 this=exp.DataType.Type[type_token.value], 4921 expressions=expressions, 4922 nested=nested, 4923 prefix=prefix, 4924 ) 4925 4926 # Empty arrays/structs are allowed 4927 if values is not None: 4928 cls = exp.Struct if is_struct else exp.Array 4929 this = exp.cast(cls(expressions=values), this, copy=False) 4930 4931 elif expressions: 4932 this.set("expressions", expressions) 4933 4934 # https://materialize.com/docs/sql/types/list/#type-name 4935 while self._match(TokenType.LIST): 4936 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4937 4938 index = self._index 4939 4940 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4941 matched_array = self._match(TokenType.ARRAY) 4942 4943 while self._curr: 4944 datatype_token = self._prev.token_type 4945 matched_l_bracket = self._match(TokenType.L_BRACKET) 4946 if not matched_l_bracket and not matched_array: 4947 break 4948 4949 matched_array = False 4950 values = self._parse_csv(self._parse_assignment) or None 4951 if ( 4952 values 4953 and not schema 4954 and ( 4955 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4956 ) 4957 ): 4958 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4959 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4960 self._retreat(index) 4961 break 4962 4963 this = exp.DataType( 4964 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4965 ) 4966 self._match(TokenType.R_BRACKET) 4967 4968 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4969 converter = self.TYPE_CONVERTERS.get(this.this) 4970 if converter: 4971 this = converter(t.cast(exp.DataType, this)) 4972 4973 return this 4974 4975 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4976 index = self._index 4977 4978 if ( 4979 self._curr 4980 and self._next 4981 and self._curr.token_type in self.TYPE_TOKENS 4982 and self._next.token_type in self.TYPE_TOKENS 4983 ): 4984 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4985 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4986 this = self._parse_id_var() 4987 else: 4988 this = ( 4989 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4990 or self._parse_id_var() 4991 ) 4992 4993 self._match(TokenType.COLON) 4994 4995 if ( 4996 type_required 4997 and not isinstance(this, exp.DataType) 4998 and not self._match_set(self.TYPE_TOKENS, advance=False) 4999 ): 5000 self._retreat(index) 5001 return self._parse_types() 5002 5003 return self._parse_column_def(this) 5004 5005 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5006 if not self._match_text_seq("AT", "TIME", "ZONE"): 5007 return this 5008 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5009 5010 def _parse_column(self) -> t.Optional[exp.Expression]: 5011 this = self._parse_column_reference() 5012 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5013 5014 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5015 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5016 5017 return column 5018 5019 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5020 this = self._parse_field() 5021 if ( 5022 not this 5023 and self._match(TokenType.VALUES, advance=False) 5024 and self.VALUES_FOLLOWED_BY_PAREN 5025 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5026 ): 5027 this = self._parse_id_var() 5028 5029 if isinstance(this, exp.Identifier): 5030 # We bubble up comments from the Identifier to the Column 5031 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5032 5033 return this 5034 5035 def _parse_colon_as_variant_extract( 5036 self, this: t.Optional[exp.Expression] 5037 ) -> t.Optional[exp.Expression]: 5038 casts = [] 5039 json_path = [] 5040 escape = None 5041 5042 while self._match(TokenType.COLON): 5043 start_index = self._index 5044 5045 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5046 path = self._parse_column_ops( 5047 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5048 ) 5049 5050 # The cast :: operator has a lower precedence than the extraction operator :, so 5051 # we rearrange the AST appropriately to avoid casting the JSON path 5052 while isinstance(path, exp.Cast): 5053 casts.append(path.to) 5054 path = path.this 5055 5056 if casts: 5057 dcolon_offset = next( 5058 i 5059 for i, t in enumerate(self._tokens[start_index:]) 5060 if t.token_type == TokenType.DCOLON 5061 ) 5062 end_token = self._tokens[start_index + dcolon_offset - 1] 5063 else: 5064 end_token = self._prev 5065 5066 if path: 5067 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5068 # it'll roundtrip to a string literal in GET_PATH 5069 if isinstance(path, exp.Identifier) and path.quoted: 5070 escape = True 5071 5072 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5073 5074 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5075 # Databricks transforms it back to the colon/dot notation 5076 if json_path: 5077 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5078 5079 if json_path_expr: 5080 json_path_expr.set("escape", escape) 5081 5082 this = self.expression( 5083 exp.JSONExtract, 5084 this=this, 5085 expression=json_path_expr, 5086 variant_extract=True, 5087 ) 5088 5089 while casts: 5090 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5091 5092 return this 5093 5094 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5095 return self._parse_types() 5096 5097 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5098 this = self._parse_bracket(this) 5099 5100 while self._match_set(self.COLUMN_OPERATORS): 5101 op_token = self._prev.token_type 5102 op = self.COLUMN_OPERATORS.get(op_token) 5103 5104 if op_token == TokenType.DCOLON: 5105 field = self._parse_dcolon() 5106 if not field: 5107 self.raise_error("Expected type") 5108 elif op and self._curr: 5109 field = self._parse_column_reference() or self._parse_bracket() 5110 else: 5111 field = self._parse_field(any_token=True, anonymous_func=True) 5112 5113 if isinstance(field, exp.Func) and this: 5114 # bigquery allows function calls like x.y.count(...) 5115 # SAFE.SUBSTR(...) 5116 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5117 this = exp.replace_tree( 5118 this, 5119 lambda n: ( 5120 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5121 if n.table 5122 else n.this 5123 ) 5124 if isinstance(n, exp.Column) 5125 else n, 5126 ) 5127 5128 if op: 5129 this = op(self, this, field) 5130 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5131 this = self.expression( 5132 exp.Column, 5133 comments=this.comments, 5134 this=field, 5135 table=this.this, 5136 db=this.args.get("table"), 5137 catalog=this.args.get("db"), 5138 ) 5139 else: 5140 this = self.expression(exp.Dot, this=this, expression=field) 5141 5142 if field and field.comments: 5143 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5144 5145 this = self._parse_bracket(this) 5146 5147 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5148 5149 def _parse_primary(self) -> t.Optional[exp.Expression]: 5150 if self._match_set(self.PRIMARY_PARSERS): 5151 token_type = self._prev.token_type 5152 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5153 5154 if token_type == TokenType.STRING: 5155 expressions = [primary] 5156 while self._match(TokenType.STRING): 5157 expressions.append(exp.Literal.string(self._prev.text)) 5158 5159 if len(expressions) > 1: 5160 return self.expression(exp.Concat, expressions=expressions) 5161 5162 return primary 5163 5164 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5165 return exp.Literal.number(f"0.{self._prev.text}") 5166 5167 if self._match(TokenType.L_PAREN): 5168 comments = self._prev_comments 5169 query = self._parse_select() 5170 5171 if query: 5172 expressions = [query] 5173 else: 5174 expressions = self._parse_expressions() 5175 5176 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5177 5178 if not this and self._match(TokenType.R_PAREN, advance=False): 5179 this = self.expression(exp.Tuple) 5180 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5181 this = self._parse_subquery(this=this, parse_alias=False) 5182 elif isinstance(this, exp.Subquery): 5183 this = self._parse_subquery( 5184 this=self._parse_set_operations(this), parse_alias=False 5185 ) 5186 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5187 this = self.expression(exp.Tuple, expressions=expressions) 5188 else: 5189 this = self.expression(exp.Paren, this=this) 5190 5191 if this: 5192 this.add_comments(comments) 5193 5194 self._match_r_paren(expression=this) 5195 return this 5196 5197 return None 5198 5199 def _parse_field( 5200 self, 5201 any_token: bool = False, 5202 tokens: t.Optional[t.Collection[TokenType]] = None, 5203 anonymous_func: bool = False, 5204 ) -> t.Optional[exp.Expression]: 5205 if anonymous_func: 5206 field = ( 5207 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5208 or self._parse_primary() 5209 ) 5210 else: 5211 field = self._parse_primary() or self._parse_function( 5212 anonymous=anonymous_func, any_token=any_token 5213 ) 5214 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5215 5216 def _parse_function( 5217 self, 5218 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5219 anonymous: bool = False, 5220 optional_parens: bool = True, 5221 any_token: bool = False, 5222 ) -> t.Optional[exp.Expression]: 5223 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5224 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5225 fn_syntax = False 5226 if ( 5227 self._match(TokenType.L_BRACE, advance=False) 5228 and self._next 5229 and self._next.text.upper() == "FN" 5230 ): 5231 self._advance(2) 5232 fn_syntax = True 5233 5234 func = self._parse_function_call( 5235 functions=functions, 5236 anonymous=anonymous, 5237 optional_parens=optional_parens, 5238 any_token=any_token, 5239 ) 5240 5241 if fn_syntax: 5242 self._match(TokenType.R_BRACE) 5243 5244 return func 5245 5246 def _parse_function_call( 5247 self, 5248 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5249 anonymous: bool = False, 5250 optional_parens: bool = True, 5251 any_token: bool = False, 5252 ) -> t.Optional[exp.Expression]: 5253 if not self._curr: 5254 return None 5255 5256 comments = self._curr.comments 5257 token_type = self._curr.token_type 5258 this = self._curr.text 5259 upper = this.upper() 5260 5261 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5262 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5263 self._advance() 5264 return self._parse_window(parser(self)) 5265 5266 if not self._next or self._next.token_type != TokenType.L_PAREN: 5267 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5268 self._advance() 5269 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5270 5271 return None 5272 5273 if any_token: 5274 if token_type in self.RESERVED_TOKENS: 5275 return None 5276 elif token_type not in self.FUNC_TOKENS: 5277 return None 5278 5279 self._advance(2) 5280 5281 parser = self.FUNCTION_PARSERS.get(upper) 5282 if parser and not anonymous: 5283 this = parser(self) 5284 else: 5285 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5286 5287 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5288 this = self.expression( 5289 subquery_predicate, comments=comments, this=self._parse_select() 5290 ) 5291 self._match_r_paren() 5292 return this 5293 5294 if functions is None: 5295 functions = self.FUNCTIONS 5296 5297 function = functions.get(upper) 5298 5299 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5300 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5301 5302 if alias: 5303 args = self._kv_to_prop_eq(args) 5304 5305 if function and not anonymous: 5306 if "dialect" in function.__code__.co_varnames: 5307 func = function(args, dialect=self.dialect) 5308 else: 5309 func = function(args) 5310 5311 func = self.validate_expression(func, args) 5312 if not self.dialect.NORMALIZE_FUNCTIONS: 5313 func.meta["name"] = this 5314 5315 this = func 5316 else: 5317 if token_type == TokenType.IDENTIFIER: 5318 this = exp.Identifier(this=this, quoted=True) 5319 this = self.expression(exp.Anonymous, this=this, expressions=args) 5320 5321 if isinstance(this, exp.Expression): 5322 this.add_comments(comments) 5323 5324 self._match_r_paren(this) 5325 return self._parse_window(this) 5326 5327 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5328 return expression 5329 5330 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5331 transformed = [] 5332 5333 for index, e in enumerate(expressions): 5334 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5335 if isinstance(e, exp.Alias): 5336 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5337 5338 if not isinstance(e, exp.PropertyEQ): 5339 e = self.expression( 5340 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5341 ) 5342 5343 if isinstance(e.this, exp.Column): 5344 e.this.replace(e.this.this) 5345 else: 5346 e = self._to_prop_eq(e, index) 5347 5348 transformed.append(e) 5349 5350 return transformed 5351 5352 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5353 return self._parse_statement() 5354 5355 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5356 return self._parse_column_def(self._parse_id_var()) 5357 5358 def _parse_user_defined_function( 5359 self, kind: t.Optional[TokenType] = None 5360 ) -> t.Optional[exp.Expression]: 5361 this = self._parse_id_var() 5362 5363 while self._match(TokenType.DOT): 5364 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5365 5366 if not self._match(TokenType.L_PAREN): 5367 return this 5368 5369 expressions = self._parse_csv(self._parse_function_parameter) 5370 self._match_r_paren() 5371 return self.expression( 5372 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5373 ) 5374 5375 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5376 literal = self._parse_primary() 5377 if literal: 5378 return self.expression(exp.Introducer, this=token.text, expression=literal) 5379 5380 return self.expression(exp.Identifier, this=token.text) 5381 5382 def _parse_session_parameter(self) -> exp.SessionParameter: 5383 kind = None 5384 this = self._parse_id_var() or self._parse_primary() 5385 5386 if this and self._match(TokenType.DOT): 5387 kind = this.name 5388 this = self._parse_var() or self._parse_primary() 5389 5390 return self.expression(exp.SessionParameter, this=this, kind=kind) 5391 5392 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5393 return self._parse_id_var() 5394 5395 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5396 index = self._index 5397 5398 if self._match(TokenType.L_PAREN): 5399 expressions = t.cast( 5400 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5401 ) 5402 5403 if not self._match(TokenType.R_PAREN): 5404 self._retreat(index) 5405 else: 5406 expressions = [self._parse_lambda_arg()] 5407 5408 if self._match_set(self.LAMBDAS): 5409 return self.LAMBDAS[self._prev.token_type](self, expressions) 5410 5411 self._retreat(index) 5412 5413 this: t.Optional[exp.Expression] 5414 5415 if self._match(TokenType.DISTINCT): 5416 this = self.expression( 5417 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5418 ) 5419 else: 5420 this = self._parse_select_or_expression(alias=alias) 5421 5422 return self._parse_limit( 5423 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5424 ) 5425 5426 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5427 index = self._index 5428 if not self._match(TokenType.L_PAREN): 5429 return this 5430 5431 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5432 # expr can be of both types 5433 if self._match_set(self.SELECT_START_TOKENS): 5434 self._retreat(index) 5435 return this 5436 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5437 self._match_r_paren() 5438 return self.expression(exp.Schema, this=this, expressions=args) 5439 5440 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5441 return self._parse_column_def(self._parse_field(any_token=True)) 5442 5443 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5444 # column defs are not really columns, they're identifiers 5445 if isinstance(this, exp.Column): 5446 this = this.this 5447 5448 kind = self._parse_types(schema=True) 5449 5450 if self._match_text_seq("FOR", "ORDINALITY"): 5451 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5452 5453 constraints: t.List[exp.Expression] = [] 5454 5455 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5456 ("ALIAS", "MATERIALIZED") 5457 ): 5458 persisted = self._prev.text.upper() == "MATERIALIZED" 5459 constraint_kind = exp.ComputedColumnConstraint( 5460 this=self._parse_assignment(), 5461 persisted=persisted or self._match_text_seq("PERSISTED"), 5462 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5463 ) 5464 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5465 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5466 self._match(TokenType.ALIAS) 5467 constraints.append( 5468 self.expression( 5469 exp.ColumnConstraint, 5470 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5471 ) 5472 ) 5473 5474 while True: 5475 constraint = self._parse_column_constraint() 5476 if not constraint: 5477 break 5478 constraints.append(constraint) 5479 5480 if not kind and not constraints: 5481 return this 5482 5483 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5484 5485 def _parse_auto_increment( 5486 self, 5487 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5488 start = None 5489 increment = None 5490 5491 if self._match(TokenType.L_PAREN, advance=False): 5492 args = self._parse_wrapped_csv(self._parse_bitwise) 5493 start = seq_get(args, 0) 5494 increment = seq_get(args, 1) 5495 elif self._match_text_seq("START"): 5496 start = self._parse_bitwise() 5497 self._match_text_seq("INCREMENT") 5498 increment = self._parse_bitwise() 5499 5500 if start and increment: 5501 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5502 5503 return exp.AutoIncrementColumnConstraint() 5504 5505 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5506 if not self._match_text_seq("REFRESH"): 5507 self._retreat(self._index - 1) 5508 return None 5509 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5510 5511 def _parse_compress(self) -> exp.CompressColumnConstraint: 5512 if self._match(TokenType.L_PAREN, advance=False): 5513 return self.expression( 5514 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5515 ) 5516 5517 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5518 5519 def _parse_generated_as_identity( 5520 self, 5521 ) -> ( 5522 exp.GeneratedAsIdentityColumnConstraint 5523 | exp.ComputedColumnConstraint 5524 | exp.GeneratedAsRowColumnConstraint 5525 ): 5526 if self._match_text_seq("BY", "DEFAULT"): 5527 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5528 this = self.expression( 5529 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5530 ) 5531 else: 5532 self._match_text_seq("ALWAYS") 5533 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5534 5535 self._match(TokenType.ALIAS) 5536 5537 if self._match_text_seq("ROW"): 5538 start = self._match_text_seq("START") 5539 if not start: 5540 self._match(TokenType.END) 5541 hidden = self._match_text_seq("HIDDEN") 5542 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5543 5544 identity = self._match_text_seq("IDENTITY") 5545 5546 if self._match(TokenType.L_PAREN): 5547 if self._match(TokenType.START_WITH): 5548 this.set("start", self._parse_bitwise()) 5549 if self._match_text_seq("INCREMENT", "BY"): 5550 this.set("increment", self._parse_bitwise()) 5551 if self._match_text_seq("MINVALUE"): 5552 this.set("minvalue", self._parse_bitwise()) 5553 if self._match_text_seq("MAXVALUE"): 5554 this.set("maxvalue", self._parse_bitwise()) 5555 5556 if self._match_text_seq("CYCLE"): 5557 this.set("cycle", True) 5558 elif self._match_text_seq("NO", "CYCLE"): 5559 this.set("cycle", False) 5560 5561 if not identity: 5562 this.set("expression", self._parse_range()) 5563 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5564 args = self._parse_csv(self._parse_bitwise) 5565 this.set("start", seq_get(args, 0)) 5566 this.set("increment", seq_get(args, 1)) 5567 5568 self._match_r_paren() 5569 5570 return this 5571 5572 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5573 self._match_text_seq("LENGTH") 5574 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5575 5576 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5577 if self._match_text_seq("NULL"): 5578 return self.expression(exp.NotNullColumnConstraint) 5579 if self._match_text_seq("CASESPECIFIC"): 5580 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5581 if self._match_text_seq("FOR", "REPLICATION"): 5582 return self.expression(exp.NotForReplicationColumnConstraint) 5583 5584 # Unconsume the `NOT` token 5585 self._retreat(self._index - 1) 5586 return None 5587 5588 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5589 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5590 5591 procedure_option_follows = ( 5592 self._match(TokenType.WITH, advance=False) 5593 and self._next 5594 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5595 ) 5596 5597 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5598 return self.expression( 5599 exp.ColumnConstraint, 5600 this=this, 5601 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5602 ) 5603 5604 return this 5605 5606 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5607 if not self._match(TokenType.CONSTRAINT): 5608 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5609 5610 return self.expression( 5611 exp.Constraint, 5612 this=self._parse_id_var(), 5613 expressions=self._parse_unnamed_constraints(), 5614 ) 5615 5616 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5617 constraints = [] 5618 while True: 5619 constraint = self._parse_unnamed_constraint() or self._parse_function() 5620 if not constraint: 5621 break 5622 constraints.append(constraint) 5623 5624 return constraints 5625 5626 def _parse_unnamed_constraint( 5627 self, constraints: t.Optional[t.Collection[str]] = None 5628 ) -> t.Optional[exp.Expression]: 5629 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5630 constraints or self.CONSTRAINT_PARSERS 5631 ): 5632 return None 5633 5634 constraint = self._prev.text.upper() 5635 if constraint not in self.CONSTRAINT_PARSERS: 5636 self.raise_error(f"No parser found for schema constraint {constraint}.") 5637 5638 return self.CONSTRAINT_PARSERS[constraint](self) 5639 5640 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5641 return self._parse_id_var(any_token=False) 5642 5643 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5644 self._match_text_seq("KEY") 5645 return self.expression( 5646 exp.UniqueColumnConstraint, 5647 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5648 this=self._parse_schema(self._parse_unique_key()), 5649 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5650 on_conflict=self._parse_on_conflict(), 5651 ) 5652 5653 def _parse_key_constraint_options(self) -> t.List[str]: 5654 options = [] 5655 while True: 5656 if not self._curr: 5657 break 5658 5659 if self._match(TokenType.ON): 5660 action = None 5661 on = self._advance_any() and self._prev.text 5662 5663 if self._match_text_seq("NO", "ACTION"): 5664 action = "NO ACTION" 5665 elif self._match_text_seq("CASCADE"): 5666 action = "CASCADE" 5667 elif self._match_text_seq("RESTRICT"): 5668 action = "RESTRICT" 5669 elif self._match_pair(TokenType.SET, TokenType.NULL): 5670 action = "SET NULL" 5671 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5672 action = "SET DEFAULT" 5673 else: 5674 self.raise_error("Invalid key constraint") 5675 5676 options.append(f"ON {on} {action}") 5677 else: 5678 var = self._parse_var_from_options( 5679 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5680 ) 5681 if not var: 5682 break 5683 options.append(var.name) 5684 5685 return options 5686 5687 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5688 if match and not self._match(TokenType.REFERENCES): 5689 return None 5690 5691 expressions = None 5692 this = self._parse_table(schema=True) 5693 options = self._parse_key_constraint_options() 5694 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5695 5696 def _parse_foreign_key(self) -> exp.ForeignKey: 5697 expressions = self._parse_wrapped_id_vars() 5698 reference = self._parse_references() 5699 options = {} 5700 5701 while self._match(TokenType.ON): 5702 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5703 self.raise_error("Expected DELETE or UPDATE") 5704 5705 kind = self._prev.text.lower() 5706 5707 if self._match_text_seq("NO", "ACTION"): 5708 action = "NO ACTION" 5709 elif self._match(TokenType.SET): 5710 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5711 action = "SET " + self._prev.text.upper() 5712 else: 5713 self._advance() 5714 action = self._prev.text.upper() 5715 5716 options[kind] = action 5717 5718 return self.expression( 5719 exp.ForeignKey, 5720 expressions=expressions, 5721 reference=reference, 5722 **options, # type: ignore 5723 ) 5724 5725 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5726 return self._parse_field() 5727 5728 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5729 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5730 self._retreat(self._index - 1) 5731 return None 5732 5733 id_vars = self._parse_wrapped_id_vars() 5734 return self.expression( 5735 exp.PeriodForSystemTimeConstraint, 5736 this=seq_get(id_vars, 0), 5737 expression=seq_get(id_vars, 1), 5738 ) 5739 5740 def _parse_primary_key( 5741 self, wrapped_optional: bool = False, in_props: bool = False 5742 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5743 desc = ( 5744 self._match_set((TokenType.ASC, TokenType.DESC)) 5745 and self._prev.token_type == TokenType.DESC 5746 ) 5747 5748 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5749 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5750 5751 expressions = self._parse_wrapped_csv( 5752 self._parse_primary_key_part, optional=wrapped_optional 5753 ) 5754 options = self._parse_key_constraint_options() 5755 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5756 5757 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5758 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5759 5760 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5761 """ 5762 Parses a datetime column in ODBC format. We parse the column into the corresponding 5763 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5764 same as we did for `DATE('yyyy-mm-dd')`. 5765 5766 Reference: 5767 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5768 """ 5769 self._match(TokenType.VAR) 5770 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5771 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5772 if not self._match(TokenType.R_BRACE): 5773 self.raise_error("Expected }") 5774 return expression 5775 5776 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5777 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5778 return this 5779 5780 bracket_kind = self._prev.token_type 5781 if ( 5782 bracket_kind == TokenType.L_BRACE 5783 and self._curr 5784 and self._curr.token_type == TokenType.VAR 5785 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5786 ): 5787 return self._parse_odbc_datetime_literal() 5788 5789 expressions = self._parse_csv( 5790 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5791 ) 5792 5793 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5794 self.raise_error("Expected ]") 5795 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5796 self.raise_error("Expected }") 5797 5798 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5799 if bracket_kind == TokenType.L_BRACE: 5800 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5801 elif not this: 5802 this = build_array_constructor( 5803 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5804 ) 5805 else: 5806 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5807 if constructor_type: 5808 return build_array_constructor( 5809 constructor_type, 5810 args=expressions, 5811 bracket_kind=bracket_kind, 5812 dialect=self.dialect, 5813 ) 5814 5815 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5816 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5817 5818 self._add_comments(this) 5819 return self._parse_bracket(this) 5820 5821 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5822 if self._match(TokenType.COLON): 5823 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5824 return this 5825 5826 def _parse_case(self) -> t.Optional[exp.Expression]: 5827 ifs = [] 5828 default = None 5829 5830 comments = self._prev_comments 5831 expression = self._parse_assignment() 5832 5833 while self._match(TokenType.WHEN): 5834 this = self._parse_assignment() 5835 self._match(TokenType.THEN) 5836 then = self._parse_assignment() 5837 ifs.append(self.expression(exp.If, this=this, true=then)) 5838 5839 if self._match(TokenType.ELSE): 5840 default = self._parse_assignment() 5841 5842 if not self._match(TokenType.END): 5843 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5844 default = exp.column("interval") 5845 else: 5846 self.raise_error("Expected END after CASE", self._prev) 5847 5848 return self.expression( 5849 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5850 ) 5851 5852 def _parse_if(self) -> t.Optional[exp.Expression]: 5853 if self._match(TokenType.L_PAREN): 5854 args = self._parse_csv(self._parse_assignment) 5855 this = self.validate_expression(exp.If.from_arg_list(args), args) 5856 self._match_r_paren() 5857 else: 5858 index = self._index - 1 5859 5860 if self.NO_PAREN_IF_COMMANDS and index == 0: 5861 return self._parse_as_command(self._prev) 5862 5863 condition = self._parse_assignment() 5864 5865 if not condition: 5866 self._retreat(index) 5867 return None 5868 5869 self._match(TokenType.THEN) 5870 true = self._parse_assignment() 5871 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5872 self._match(TokenType.END) 5873 this = self.expression(exp.If, this=condition, true=true, false=false) 5874 5875 return this 5876 5877 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5878 if not self._match_text_seq("VALUE", "FOR"): 5879 self._retreat(self._index - 1) 5880 return None 5881 5882 return self.expression( 5883 exp.NextValueFor, 5884 this=self._parse_column(), 5885 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5886 ) 5887 5888 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5889 this = self._parse_function() or self._parse_var_or_string(upper=True) 5890 5891 if self._match(TokenType.FROM): 5892 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5893 5894 if not self._match(TokenType.COMMA): 5895 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5896 5897 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5898 5899 def _parse_gap_fill(self) -> exp.GapFill: 5900 self._match(TokenType.TABLE) 5901 this = self._parse_table() 5902 5903 self._match(TokenType.COMMA) 5904 args = [this, *self._parse_csv(self._parse_lambda)] 5905 5906 gap_fill = exp.GapFill.from_arg_list(args) 5907 return self.validate_expression(gap_fill, args) 5908 5909 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5910 this = self._parse_assignment() 5911 5912 if not self._match(TokenType.ALIAS): 5913 if self._match(TokenType.COMMA): 5914 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5915 5916 self.raise_error("Expected AS after CAST") 5917 5918 fmt = None 5919 to = self._parse_types() 5920 5921 if self._match(TokenType.FORMAT): 5922 fmt_string = self._parse_string() 5923 fmt = self._parse_at_time_zone(fmt_string) 5924 5925 if not to: 5926 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5927 if to.this in exp.DataType.TEMPORAL_TYPES: 5928 this = self.expression( 5929 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5930 this=this, 5931 format=exp.Literal.string( 5932 format_time( 5933 fmt_string.this if fmt_string else "", 5934 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5935 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5936 ) 5937 ), 5938 safe=safe, 5939 ) 5940 5941 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5942 this.set("zone", fmt.args["zone"]) 5943 return this 5944 elif not to: 5945 self.raise_error("Expected TYPE after CAST") 5946 elif isinstance(to, exp.Identifier): 5947 to = exp.DataType.build(to.name, udt=True) 5948 elif to.this == exp.DataType.Type.CHAR: 5949 if self._match(TokenType.CHARACTER_SET): 5950 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5951 5952 return self.expression( 5953 exp.Cast if strict else exp.TryCast, 5954 this=this, 5955 to=to, 5956 format=fmt, 5957 safe=safe, 5958 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5959 ) 5960 5961 def _parse_string_agg(self) -> exp.GroupConcat: 5962 if self._match(TokenType.DISTINCT): 5963 args: t.List[t.Optional[exp.Expression]] = [ 5964 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5965 ] 5966 if self._match(TokenType.COMMA): 5967 args.extend(self._parse_csv(self._parse_assignment)) 5968 else: 5969 args = self._parse_csv(self._parse_assignment) # type: ignore 5970 5971 if self._match_text_seq("ON", "OVERFLOW"): 5972 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 5973 if self._match_text_seq("ERROR"): 5974 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 5975 else: 5976 self._match_text_seq("TRUNCATE") 5977 on_overflow = self.expression( 5978 exp.OverflowTruncateBehavior, 5979 this=self._parse_string(), 5980 with_count=( 5981 self._match_text_seq("WITH", "COUNT") 5982 or not self._match_text_seq("WITHOUT", "COUNT") 5983 ), 5984 ) 5985 else: 5986 on_overflow = None 5987 5988 index = self._index 5989 if not self._match(TokenType.R_PAREN) and args: 5990 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5991 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5992 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5993 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5994 5995 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5996 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5997 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5998 if not self._match_text_seq("WITHIN", "GROUP"): 5999 self._retreat(index) 6000 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6001 6002 # The corresponding match_r_paren will be called in parse_function (caller) 6003 self._match_l_paren() 6004 6005 return self.expression( 6006 exp.GroupConcat, 6007 this=self._parse_order(this=seq_get(args, 0)), 6008 separator=seq_get(args, 1), 6009 on_overflow=on_overflow, 6010 ) 6011 6012 def _parse_convert( 6013 self, strict: bool, safe: t.Optional[bool] = None 6014 ) -> t.Optional[exp.Expression]: 6015 this = self._parse_bitwise() 6016 6017 if self._match(TokenType.USING): 6018 to: t.Optional[exp.Expression] = self.expression( 6019 exp.CharacterSet, this=self._parse_var() 6020 ) 6021 elif self._match(TokenType.COMMA): 6022 to = self._parse_types() 6023 else: 6024 to = None 6025 6026 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6027 6028 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6029 """ 6030 There are generally two variants of the DECODE function: 6031 6032 - DECODE(bin, charset) 6033 - DECODE(expression, search, result [, search, result] ... [, default]) 6034 6035 The second variant will always be parsed into a CASE expression. Note that NULL 6036 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6037 instead of relying on pattern matching. 6038 """ 6039 args = self._parse_csv(self._parse_assignment) 6040 6041 if len(args) < 3: 6042 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6043 6044 expression, *expressions = args 6045 if not expression: 6046 return None 6047 6048 ifs = [] 6049 for search, result in zip(expressions[::2], expressions[1::2]): 6050 if not search or not result: 6051 return None 6052 6053 if isinstance(search, exp.Literal): 6054 ifs.append( 6055 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6056 ) 6057 elif isinstance(search, exp.Null): 6058 ifs.append( 6059 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6060 ) 6061 else: 6062 cond = exp.or_( 6063 exp.EQ(this=expression.copy(), expression=search), 6064 exp.and_( 6065 exp.Is(this=expression.copy(), expression=exp.Null()), 6066 exp.Is(this=search.copy(), expression=exp.Null()), 6067 copy=False, 6068 ), 6069 copy=False, 6070 ) 6071 ifs.append(exp.If(this=cond, true=result)) 6072 6073 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6074 6075 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6076 self._match_text_seq("KEY") 6077 key = self._parse_column() 6078 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6079 self._match_text_seq("VALUE") 6080 value = self._parse_bitwise() 6081 6082 if not key and not value: 6083 return None 6084 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6085 6086 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6087 if not this or not self._match_text_seq("FORMAT", "JSON"): 6088 return this 6089 6090 return self.expression(exp.FormatJson, this=this) 6091 6092 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6093 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6094 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6095 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6096 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6097 else: 6098 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6099 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6100 6101 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6102 6103 if not empty and not error and not null: 6104 return None 6105 6106 return self.expression( 6107 exp.OnCondition, 6108 empty=empty, 6109 error=error, 6110 null=null, 6111 ) 6112 6113 def _parse_on_handling( 6114 self, on: str, *values: str 6115 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6116 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6117 for value in values: 6118 if self._match_text_seq(value, "ON", on): 6119 return f"{value} ON {on}" 6120 6121 index = self._index 6122 if self._match(TokenType.DEFAULT): 6123 default_value = self._parse_bitwise() 6124 if self._match_text_seq("ON", on): 6125 return default_value 6126 6127 self._retreat(index) 6128 6129 return None 6130 6131 @t.overload 6132 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6133 6134 @t.overload 6135 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6136 6137 def _parse_json_object(self, agg=False): 6138 star = self._parse_star() 6139 expressions = ( 6140 [star] 6141 if star 6142 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6143 ) 6144 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6145 6146 unique_keys = None 6147 if self._match_text_seq("WITH", "UNIQUE"): 6148 unique_keys = True 6149 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6150 unique_keys = False 6151 6152 self._match_text_seq("KEYS") 6153 6154 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6155 self._parse_type() 6156 ) 6157 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6158 6159 return self.expression( 6160 exp.JSONObjectAgg if agg else exp.JSONObject, 6161 expressions=expressions, 6162 null_handling=null_handling, 6163 unique_keys=unique_keys, 6164 return_type=return_type, 6165 encoding=encoding, 6166 ) 6167 6168 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6169 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6170 if not self._match_text_seq("NESTED"): 6171 this = self._parse_id_var() 6172 kind = self._parse_types(allow_identifiers=False) 6173 nested = None 6174 else: 6175 this = None 6176 kind = None 6177 nested = True 6178 6179 path = self._match_text_seq("PATH") and self._parse_string() 6180 nested_schema = nested and self._parse_json_schema() 6181 6182 return self.expression( 6183 exp.JSONColumnDef, 6184 this=this, 6185 kind=kind, 6186 path=path, 6187 nested_schema=nested_schema, 6188 ) 6189 6190 def _parse_json_schema(self) -> exp.JSONSchema: 6191 self._match_text_seq("COLUMNS") 6192 return self.expression( 6193 exp.JSONSchema, 6194 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6195 ) 6196 6197 def _parse_json_table(self) -> exp.JSONTable: 6198 this = self._parse_format_json(self._parse_bitwise()) 6199 path = self._match(TokenType.COMMA) and self._parse_string() 6200 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6201 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6202 schema = self._parse_json_schema() 6203 6204 return exp.JSONTable( 6205 this=this, 6206 schema=schema, 6207 path=path, 6208 error_handling=error_handling, 6209 empty_handling=empty_handling, 6210 ) 6211 6212 def _parse_match_against(self) -> exp.MatchAgainst: 6213 expressions = self._parse_csv(self._parse_column) 6214 6215 self._match_text_seq(")", "AGAINST", "(") 6216 6217 this = self._parse_string() 6218 6219 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6220 modifier = "IN NATURAL LANGUAGE MODE" 6221 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6222 modifier = f"{modifier} WITH QUERY EXPANSION" 6223 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6224 modifier = "IN BOOLEAN MODE" 6225 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6226 modifier = "WITH QUERY EXPANSION" 6227 else: 6228 modifier = None 6229 6230 return self.expression( 6231 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6232 ) 6233 6234 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6235 def _parse_open_json(self) -> exp.OpenJSON: 6236 this = self._parse_bitwise() 6237 path = self._match(TokenType.COMMA) and self._parse_string() 6238 6239 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6240 this = self._parse_field(any_token=True) 6241 kind = self._parse_types() 6242 path = self._parse_string() 6243 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6244 6245 return self.expression( 6246 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6247 ) 6248 6249 expressions = None 6250 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6251 self._match_l_paren() 6252 expressions = self._parse_csv(_parse_open_json_column_def) 6253 6254 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6255 6256 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6257 args = self._parse_csv(self._parse_bitwise) 6258 6259 if self._match(TokenType.IN): 6260 return self.expression( 6261 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6262 ) 6263 6264 if haystack_first: 6265 haystack = seq_get(args, 0) 6266 needle = seq_get(args, 1) 6267 else: 6268 needle = seq_get(args, 0) 6269 haystack = seq_get(args, 1) 6270 6271 return self.expression( 6272 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6273 ) 6274 6275 def _parse_predict(self) -> exp.Predict: 6276 self._match_text_seq("MODEL") 6277 this = self._parse_table() 6278 6279 self._match(TokenType.COMMA) 6280 self._match_text_seq("TABLE") 6281 6282 return self.expression( 6283 exp.Predict, 6284 this=this, 6285 expression=self._parse_table(), 6286 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6287 ) 6288 6289 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6290 args = self._parse_csv(self._parse_table) 6291 return exp.JoinHint(this=func_name.upper(), expressions=args) 6292 6293 def _parse_substring(self) -> exp.Substring: 6294 # Postgres supports the form: substring(string [from int] [for int]) 6295 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6296 6297 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6298 6299 if self._match(TokenType.FROM): 6300 args.append(self._parse_bitwise()) 6301 if self._match(TokenType.FOR): 6302 if len(args) == 1: 6303 args.append(exp.Literal.number(1)) 6304 args.append(self._parse_bitwise()) 6305 6306 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6307 6308 def _parse_trim(self) -> exp.Trim: 6309 # https://www.w3resource.com/sql/character-functions/trim.php 6310 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6311 6312 position = None 6313 collation = None 6314 expression = None 6315 6316 if self._match_texts(self.TRIM_TYPES): 6317 position = self._prev.text.upper() 6318 6319 this = self._parse_bitwise() 6320 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6321 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6322 expression = self._parse_bitwise() 6323 6324 if invert_order: 6325 this, expression = expression, this 6326 6327 if self._match(TokenType.COLLATE): 6328 collation = self._parse_bitwise() 6329 6330 return self.expression( 6331 exp.Trim, this=this, position=position, expression=expression, collation=collation 6332 ) 6333 6334 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6335 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6336 6337 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6338 return self._parse_window(self._parse_id_var(), alias=True) 6339 6340 def _parse_respect_or_ignore_nulls( 6341 self, this: t.Optional[exp.Expression] 6342 ) -> t.Optional[exp.Expression]: 6343 if self._match_text_seq("IGNORE", "NULLS"): 6344 return self.expression(exp.IgnoreNulls, this=this) 6345 if self._match_text_seq("RESPECT", "NULLS"): 6346 return self.expression(exp.RespectNulls, this=this) 6347 return this 6348 6349 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6350 if self._match(TokenType.HAVING): 6351 self._match_texts(("MAX", "MIN")) 6352 max = self._prev.text.upper() != "MIN" 6353 return self.expression( 6354 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6355 ) 6356 6357 return this 6358 6359 def _parse_window( 6360 self, this: t.Optional[exp.Expression], alias: bool = False 6361 ) -> t.Optional[exp.Expression]: 6362 func = this 6363 comments = func.comments if isinstance(func, exp.Expression) else None 6364 6365 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6366 self._match(TokenType.WHERE) 6367 this = self.expression( 6368 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6369 ) 6370 self._match_r_paren() 6371 6372 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6373 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6374 if self._match_text_seq("WITHIN", "GROUP"): 6375 order = self._parse_wrapped(self._parse_order) 6376 this = self.expression(exp.WithinGroup, this=this, expression=order) 6377 6378 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6379 # Some dialects choose to implement and some do not. 6380 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6381 6382 # There is some code above in _parse_lambda that handles 6383 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6384 6385 # The below changes handle 6386 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6387 6388 # Oracle allows both formats 6389 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6390 # and Snowflake chose to do the same for familiarity 6391 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6392 if isinstance(this, exp.AggFunc): 6393 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6394 6395 if ignore_respect and ignore_respect is not this: 6396 ignore_respect.replace(ignore_respect.this) 6397 this = self.expression(ignore_respect.__class__, this=this) 6398 6399 this = self._parse_respect_or_ignore_nulls(this) 6400 6401 # bigquery select from window x AS (partition by ...) 6402 if alias: 6403 over = None 6404 self._match(TokenType.ALIAS) 6405 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6406 return this 6407 else: 6408 over = self._prev.text.upper() 6409 6410 if comments and isinstance(func, exp.Expression): 6411 func.pop_comments() 6412 6413 if not self._match(TokenType.L_PAREN): 6414 return self.expression( 6415 exp.Window, 6416 comments=comments, 6417 this=this, 6418 alias=self._parse_id_var(False), 6419 over=over, 6420 ) 6421 6422 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6423 6424 first = self._match(TokenType.FIRST) 6425 if self._match_text_seq("LAST"): 6426 first = False 6427 6428 partition, order = self._parse_partition_and_order() 6429 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6430 6431 if kind: 6432 self._match(TokenType.BETWEEN) 6433 start = self._parse_window_spec() 6434 self._match(TokenType.AND) 6435 end = self._parse_window_spec() 6436 6437 spec = self.expression( 6438 exp.WindowSpec, 6439 kind=kind, 6440 start=start["value"], 6441 start_side=start["side"], 6442 end=end["value"], 6443 end_side=end["side"], 6444 ) 6445 else: 6446 spec = None 6447 6448 self._match_r_paren() 6449 6450 window = self.expression( 6451 exp.Window, 6452 comments=comments, 6453 this=this, 6454 partition_by=partition, 6455 order=order, 6456 spec=spec, 6457 alias=window_alias, 6458 over=over, 6459 first=first, 6460 ) 6461 6462 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6463 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6464 return self._parse_window(window, alias=alias) 6465 6466 return window 6467 6468 def _parse_partition_and_order( 6469 self, 6470 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6471 return self._parse_partition_by(), self._parse_order() 6472 6473 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6474 self._match(TokenType.BETWEEN) 6475 6476 return { 6477 "value": ( 6478 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6479 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6480 or self._parse_bitwise() 6481 ), 6482 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6483 } 6484 6485 def _parse_alias( 6486 self, this: t.Optional[exp.Expression], explicit: bool = False 6487 ) -> t.Optional[exp.Expression]: 6488 any_token = self._match(TokenType.ALIAS) 6489 comments = self._prev_comments or [] 6490 6491 if explicit and not any_token: 6492 return this 6493 6494 if self._match(TokenType.L_PAREN): 6495 aliases = self.expression( 6496 exp.Aliases, 6497 comments=comments, 6498 this=this, 6499 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6500 ) 6501 self._match_r_paren(aliases) 6502 return aliases 6503 6504 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6505 self.STRING_ALIASES and self._parse_string_as_identifier() 6506 ) 6507 6508 if alias: 6509 comments.extend(alias.pop_comments()) 6510 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6511 column = this.this 6512 6513 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6514 if not this.comments and column and column.comments: 6515 this.comments = column.pop_comments() 6516 6517 return this 6518 6519 def _parse_id_var( 6520 self, 6521 any_token: bool = True, 6522 tokens: t.Optional[t.Collection[TokenType]] = None, 6523 ) -> t.Optional[exp.Expression]: 6524 expression = self._parse_identifier() 6525 if not expression and ( 6526 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6527 ): 6528 quoted = self._prev.token_type == TokenType.STRING 6529 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6530 6531 return expression 6532 6533 def _parse_string(self) -> t.Optional[exp.Expression]: 6534 if self._match_set(self.STRING_PARSERS): 6535 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6536 return self._parse_placeholder() 6537 6538 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6539 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6540 6541 def _parse_number(self) -> t.Optional[exp.Expression]: 6542 if self._match_set(self.NUMERIC_PARSERS): 6543 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6544 return self._parse_placeholder() 6545 6546 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6547 if self._match(TokenType.IDENTIFIER): 6548 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6549 return self._parse_placeholder() 6550 6551 def _parse_var( 6552 self, 6553 any_token: bool = False, 6554 tokens: t.Optional[t.Collection[TokenType]] = None, 6555 upper: bool = False, 6556 ) -> t.Optional[exp.Expression]: 6557 if ( 6558 (any_token and self._advance_any()) 6559 or self._match(TokenType.VAR) 6560 or (self._match_set(tokens) if tokens else False) 6561 ): 6562 return self.expression( 6563 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6564 ) 6565 return self._parse_placeholder() 6566 6567 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6568 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6569 self._advance() 6570 return self._prev 6571 return None 6572 6573 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6574 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6575 6576 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6577 return self._parse_primary() or self._parse_var(any_token=True) 6578 6579 def _parse_null(self) -> t.Optional[exp.Expression]: 6580 if self._match_set(self.NULL_TOKENS): 6581 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6582 return self._parse_placeholder() 6583 6584 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6585 if self._match(TokenType.TRUE): 6586 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6587 if self._match(TokenType.FALSE): 6588 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6589 return self._parse_placeholder() 6590 6591 def _parse_star(self) -> t.Optional[exp.Expression]: 6592 if self._match(TokenType.STAR): 6593 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6594 return self._parse_placeholder() 6595 6596 def _parse_parameter(self) -> exp.Parameter: 6597 this = self._parse_identifier() or self._parse_primary_or_var() 6598 return self.expression(exp.Parameter, this=this) 6599 6600 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6601 if self._match_set(self.PLACEHOLDER_PARSERS): 6602 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6603 if placeholder: 6604 return placeholder 6605 self._advance(-1) 6606 return None 6607 6608 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6609 if not self._match_texts(keywords): 6610 return None 6611 if self._match(TokenType.L_PAREN, advance=False): 6612 return self._parse_wrapped_csv(self._parse_expression) 6613 6614 expression = self._parse_expression() 6615 return [expression] if expression else None 6616 6617 def _parse_csv( 6618 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6619 ) -> t.List[exp.Expression]: 6620 parse_result = parse_method() 6621 items = [parse_result] if parse_result is not None else [] 6622 6623 while self._match(sep): 6624 self._add_comments(parse_result) 6625 parse_result = parse_method() 6626 if parse_result is not None: 6627 items.append(parse_result) 6628 6629 return items 6630 6631 def _parse_tokens( 6632 self, parse_method: t.Callable, expressions: t.Dict 6633 ) -> t.Optional[exp.Expression]: 6634 this = parse_method() 6635 6636 while self._match_set(expressions): 6637 this = self.expression( 6638 expressions[self._prev.token_type], 6639 this=this, 6640 comments=self._prev_comments, 6641 expression=parse_method(), 6642 ) 6643 6644 return this 6645 6646 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6647 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6648 6649 def _parse_wrapped_csv( 6650 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6651 ) -> t.List[exp.Expression]: 6652 return self._parse_wrapped( 6653 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6654 ) 6655 6656 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6657 wrapped = self._match(TokenType.L_PAREN) 6658 if not wrapped and not optional: 6659 self.raise_error("Expecting (") 6660 parse_result = parse_method() 6661 if wrapped: 6662 self._match_r_paren() 6663 return parse_result 6664 6665 def _parse_expressions(self) -> t.List[exp.Expression]: 6666 return self._parse_csv(self._parse_expression) 6667 6668 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6669 return self._parse_select() or self._parse_set_operations( 6670 self._parse_expression() if alias else self._parse_assignment() 6671 ) 6672 6673 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6674 return self._parse_query_modifiers( 6675 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6676 ) 6677 6678 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6679 this = None 6680 if self._match_texts(self.TRANSACTION_KIND): 6681 this = self._prev.text 6682 6683 self._match_texts(("TRANSACTION", "WORK")) 6684 6685 modes = [] 6686 while True: 6687 mode = [] 6688 while self._match(TokenType.VAR): 6689 mode.append(self._prev.text) 6690 6691 if mode: 6692 modes.append(" ".join(mode)) 6693 if not self._match(TokenType.COMMA): 6694 break 6695 6696 return self.expression(exp.Transaction, this=this, modes=modes) 6697 6698 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6699 chain = None 6700 savepoint = None 6701 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6702 6703 self._match_texts(("TRANSACTION", "WORK")) 6704 6705 if self._match_text_seq("TO"): 6706 self._match_text_seq("SAVEPOINT") 6707 savepoint = self._parse_id_var() 6708 6709 if self._match(TokenType.AND): 6710 chain = not self._match_text_seq("NO") 6711 self._match_text_seq("CHAIN") 6712 6713 if is_rollback: 6714 return self.expression(exp.Rollback, savepoint=savepoint) 6715 6716 return self.expression(exp.Commit, chain=chain) 6717 6718 def _parse_refresh(self) -> exp.Refresh: 6719 self._match(TokenType.TABLE) 6720 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6721 6722 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6723 if not self._match_text_seq("ADD"): 6724 return None 6725 6726 self._match(TokenType.COLUMN) 6727 exists_column = self._parse_exists(not_=True) 6728 expression = self._parse_field_def() 6729 6730 if expression: 6731 expression.set("exists", exists_column) 6732 6733 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6734 if self._match_texts(("FIRST", "AFTER")): 6735 position = self._prev.text 6736 column_position = self.expression( 6737 exp.ColumnPosition, this=self._parse_column(), position=position 6738 ) 6739 expression.set("position", column_position) 6740 6741 return expression 6742 6743 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6744 drop = self._match(TokenType.DROP) and self._parse_drop() 6745 if drop and not isinstance(drop, exp.Command): 6746 drop.set("kind", drop.args.get("kind", "COLUMN")) 6747 return drop 6748 6749 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6750 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6751 return self.expression( 6752 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6753 ) 6754 6755 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6756 index = self._index - 1 6757 6758 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6759 return self._parse_csv( 6760 lambda: self.expression( 6761 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6762 ) 6763 ) 6764 6765 self._retreat(index) 6766 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6767 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6768 6769 if self._match_text_seq("ADD", "COLUMNS"): 6770 schema = self._parse_schema() 6771 if schema: 6772 return [schema] 6773 return [] 6774 6775 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6776 6777 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6778 if self._match_texts(self.ALTER_ALTER_PARSERS): 6779 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6780 6781 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6782 # keyword after ALTER we default to parsing this statement 6783 self._match(TokenType.COLUMN) 6784 column = self._parse_field(any_token=True) 6785 6786 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6787 return self.expression(exp.AlterColumn, this=column, drop=True) 6788 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6789 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6790 if self._match(TokenType.COMMENT): 6791 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6792 if self._match_text_seq("DROP", "NOT", "NULL"): 6793 return self.expression( 6794 exp.AlterColumn, 6795 this=column, 6796 drop=True, 6797 allow_null=True, 6798 ) 6799 if self._match_text_seq("SET", "NOT", "NULL"): 6800 return self.expression( 6801 exp.AlterColumn, 6802 this=column, 6803 allow_null=False, 6804 ) 6805 self._match_text_seq("SET", "DATA") 6806 self._match_text_seq("TYPE") 6807 return self.expression( 6808 exp.AlterColumn, 6809 this=column, 6810 dtype=self._parse_types(), 6811 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6812 using=self._match(TokenType.USING) and self._parse_assignment(), 6813 ) 6814 6815 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6816 if self._match_texts(("ALL", "EVEN", "AUTO")): 6817 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6818 6819 self._match_text_seq("KEY", "DISTKEY") 6820 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6821 6822 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6823 if compound: 6824 self._match_text_seq("SORTKEY") 6825 6826 if self._match(TokenType.L_PAREN, advance=False): 6827 return self.expression( 6828 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6829 ) 6830 6831 self._match_texts(("AUTO", "NONE")) 6832 return self.expression( 6833 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6834 ) 6835 6836 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6837 index = self._index - 1 6838 6839 partition_exists = self._parse_exists() 6840 if self._match(TokenType.PARTITION, advance=False): 6841 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6842 6843 self._retreat(index) 6844 return self._parse_csv(self._parse_drop_column) 6845 6846 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6847 if self._match(TokenType.COLUMN): 6848 exists = self._parse_exists() 6849 old_column = self._parse_column() 6850 to = self._match_text_seq("TO") 6851 new_column = self._parse_column() 6852 6853 if old_column is None or to is None or new_column is None: 6854 return None 6855 6856 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6857 6858 self._match_text_seq("TO") 6859 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6860 6861 def _parse_alter_table_set(self) -> exp.AlterSet: 6862 alter_set = self.expression(exp.AlterSet) 6863 6864 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6865 "TABLE", "PROPERTIES" 6866 ): 6867 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6868 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6869 alter_set.set("expressions", [self._parse_assignment()]) 6870 elif self._match_texts(("LOGGED", "UNLOGGED")): 6871 alter_set.set("option", exp.var(self._prev.text.upper())) 6872 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6873 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6874 elif self._match_text_seq("LOCATION"): 6875 alter_set.set("location", self._parse_field()) 6876 elif self._match_text_seq("ACCESS", "METHOD"): 6877 alter_set.set("access_method", self._parse_field()) 6878 elif self._match_text_seq("TABLESPACE"): 6879 alter_set.set("tablespace", self._parse_field()) 6880 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6881 alter_set.set("file_format", [self._parse_field()]) 6882 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6883 alter_set.set("file_format", self._parse_wrapped_options()) 6884 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6885 alter_set.set("copy_options", self._parse_wrapped_options()) 6886 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6887 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6888 else: 6889 if self._match_text_seq("SERDE"): 6890 alter_set.set("serde", self._parse_field()) 6891 6892 alter_set.set("expressions", [self._parse_properties()]) 6893 6894 return alter_set 6895 6896 def _parse_alter(self) -> exp.Alter | exp.Command: 6897 start = self._prev 6898 6899 alter_token = self._match_set(self.ALTERABLES) and self._prev 6900 if not alter_token: 6901 return self._parse_as_command(start) 6902 6903 exists = self._parse_exists() 6904 only = self._match_text_seq("ONLY") 6905 this = self._parse_table(schema=True) 6906 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6907 6908 if self._next: 6909 self._advance() 6910 6911 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6912 if parser: 6913 actions = ensure_list(parser(self)) 6914 not_valid = self._match_text_seq("NOT", "VALID") 6915 options = self._parse_csv(self._parse_property) 6916 6917 if not self._curr and actions: 6918 return self.expression( 6919 exp.Alter, 6920 this=this, 6921 kind=alter_token.text.upper(), 6922 exists=exists, 6923 actions=actions, 6924 only=only, 6925 options=options, 6926 cluster=cluster, 6927 not_valid=not_valid, 6928 ) 6929 6930 return self._parse_as_command(start) 6931 6932 def _parse_merge(self) -> exp.Merge: 6933 self._match(TokenType.INTO) 6934 target = self._parse_table() 6935 6936 if target and self._match(TokenType.ALIAS, advance=False): 6937 target.set("alias", self._parse_table_alias()) 6938 6939 self._match(TokenType.USING) 6940 using = self._parse_table() 6941 6942 self._match(TokenType.ON) 6943 on = self._parse_assignment() 6944 6945 return self.expression( 6946 exp.Merge, 6947 this=target, 6948 using=using, 6949 on=on, 6950 expressions=self._parse_when_matched(), 6951 returning=self._parse_returning(), 6952 ) 6953 6954 def _parse_when_matched(self) -> t.List[exp.When]: 6955 whens = [] 6956 6957 while self._match(TokenType.WHEN): 6958 matched = not self._match(TokenType.NOT) 6959 self._match_text_seq("MATCHED") 6960 source = ( 6961 False 6962 if self._match_text_seq("BY", "TARGET") 6963 else self._match_text_seq("BY", "SOURCE") 6964 ) 6965 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6966 6967 self._match(TokenType.THEN) 6968 6969 if self._match(TokenType.INSERT): 6970 this = self._parse_star() 6971 if this: 6972 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6973 else: 6974 then = self.expression( 6975 exp.Insert, 6976 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6977 expression=self._match_text_seq("VALUES") and self._parse_value(), 6978 ) 6979 elif self._match(TokenType.UPDATE): 6980 expressions = self._parse_star() 6981 if expressions: 6982 then = self.expression(exp.Update, expressions=expressions) 6983 else: 6984 then = self.expression( 6985 exp.Update, 6986 expressions=self._match(TokenType.SET) 6987 and self._parse_csv(self._parse_equality), 6988 ) 6989 elif self._match(TokenType.DELETE): 6990 then = self.expression(exp.Var, this=self._prev.text) 6991 else: 6992 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6993 6994 whens.append( 6995 self.expression( 6996 exp.When, 6997 matched=matched, 6998 source=source, 6999 condition=condition, 7000 then=then, 7001 ) 7002 ) 7003 return whens 7004 7005 def _parse_show(self) -> t.Optional[exp.Expression]: 7006 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7007 if parser: 7008 return parser(self) 7009 return self._parse_as_command(self._prev) 7010 7011 def _parse_set_item_assignment( 7012 self, kind: t.Optional[str] = None 7013 ) -> t.Optional[exp.Expression]: 7014 index = self._index 7015 7016 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7017 return self._parse_set_transaction(global_=kind == "GLOBAL") 7018 7019 left = self._parse_primary() or self._parse_column() 7020 assignment_delimiter = self._match_texts(("=", "TO")) 7021 7022 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7023 self._retreat(index) 7024 return None 7025 7026 right = self._parse_statement() or self._parse_id_var() 7027 if isinstance(right, (exp.Column, exp.Identifier)): 7028 right = exp.var(right.name) 7029 7030 this = self.expression(exp.EQ, this=left, expression=right) 7031 return self.expression(exp.SetItem, this=this, kind=kind) 7032 7033 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7034 self._match_text_seq("TRANSACTION") 7035 characteristics = self._parse_csv( 7036 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7037 ) 7038 return self.expression( 7039 exp.SetItem, 7040 expressions=characteristics, 7041 kind="TRANSACTION", 7042 **{"global": global_}, # type: ignore 7043 ) 7044 7045 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7046 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7047 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7048 7049 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7050 index = self._index 7051 set_ = self.expression( 7052 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7053 ) 7054 7055 if self._curr: 7056 self._retreat(index) 7057 return self._parse_as_command(self._prev) 7058 7059 return set_ 7060 7061 def _parse_var_from_options( 7062 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7063 ) -> t.Optional[exp.Var]: 7064 start = self._curr 7065 if not start: 7066 return None 7067 7068 option = start.text.upper() 7069 continuations = options.get(option) 7070 7071 index = self._index 7072 self._advance() 7073 for keywords in continuations or []: 7074 if isinstance(keywords, str): 7075 keywords = (keywords,) 7076 7077 if self._match_text_seq(*keywords): 7078 option = f"{option} {' '.join(keywords)}" 7079 break 7080 else: 7081 if continuations or continuations is None: 7082 if raise_unmatched: 7083 self.raise_error(f"Unknown option {option}") 7084 7085 self._retreat(index) 7086 return None 7087 7088 return exp.var(option) 7089 7090 def _parse_as_command(self, start: Token) -> exp.Command: 7091 while self._curr: 7092 self._advance() 7093 text = self._find_sql(start, self._prev) 7094 size = len(start.text) 7095 self._warn_unsupported() 7096 return exp.Command(this=text[:size], expression=text[size:]) 7097 7098 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7099 settings = [] 7100 7101 self._match_l_paren() 7102 kind = self._parse_id_var() 7103 7104 if self._match(TokenType.L_PAREN): 7105 while True: 7106 key = self._parse_id_var() 7107 value = self._parse_primary() 7108 7109 if not key and value is None: 7110 break 7111 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7112 self._match(TokenType.R_PAREN) 7113 7114 self._match_r_paren() 7115 7116 return self.expression( 7117 exp.DictProperty, 7118 this=this, 7119 kind=kind.this if kind else None, 7120 settings=settings, 7121 ) 7122 7123 def _parse_dict_range(self, this: str) -> exp.DictRange: 7124 self._match_l_paren() 7125 has_min = self._match_text_seq("MIN") 7126 if has_min: 7127 min = self._parse_var() or self._parse_primary() 7128 self._match_text_seq("MAX") 7129 max = self._parse_var() or self._parse_primary() 7130 else: 7131 max = self._parse_var() or self._parse_primary() 7132 min = exp.Literal.number(0) 7133 self._match_r_paren() 7134 return self.expression(exp.DictRange, this=this, min=min, max=max) 7135 7136 def _parse_comprehension( 7137 self, this: t.Optional[exp.Expression] 7138 ) -> t.Optional[exp.Comprehension]: 7139 index = self._index 7140 expression = self._parse_column() 7141 if not self._match(TokenType.IN): 7142 self._retreat(index - 1) 7143 return None 7144 iterator = self._parse_column() 7145 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7146 return self.expression( 7147 exp.Comprehension, 7148 this=this, 7149 expression=expression, 7150 iterator=iterator, 7151 condition=condition, 7152 ) 7153 7154 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7155 if self._match(TokenType.HEREDOC_STRING): 7156 return self.expression(exp.Heredoc, this=self._prev.text) 7157 7158 if not self._match_text_seq("$"): 7159 return None 7160 7161 tags = ["$"] 7162 tag_text = None 7163 7164 if self._is_connected(): 7165 self._advance() 7166 tags.append(self._prev.text.upper()) 7167 else: 7168 self.raise_error("No closing $ found") 7169 7170 if tags[-1] != "$": 7171 if self._is_connected() and self._match_text_seq("$"): 7172 tag_text = tags[-1] 7173 tags.append("$") 7174 else: 7175 self.raise_error("No closing $ found") 7176 7177 heredoc_start = self._curr 7178 7179 while self._curr: 7180 if self._match_text_seq(*tags, advance=False): 7181 this = self._find_sql(heredoc_start, self._prev) 7182 self._advance(len(tags)) 7183 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7184 7185 self._advance() 7186 7187 self.raise_error(f"No closing {''.join(tags)} found") 7188 return None 7189 7190 def _find_parser( 7191 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7192 ) -> t.Optional[t.Callable]: 7193 if not self._curr: 7194 return None 7195 7196 index = self._index 7197 this = [] 7198 while True: 7199 # The current token might be multiple words 7200 curr = self._curr.text.upper() 7201 key = curr.split(" ") 7202 this.append(curr) 7203 7204 self._advance() 7205 result, trie = in_trie(trie, key) 7206 if result == TrieResult.FAILED: 7207 break 7208 7209 if result == TrieResult.EXISTS: 7210 subparser = parsers[" ".join(this)] 7211 return subparser 7212 7213 self._retreat(index) 7214 return None 7215 7216 def _match(self, token_type, advance=True, expression=None): 7217 if not self._curr: 7218 return None 7219 7220 if self._curr.token_type == token_type: 7221 if advance: 7222 self._advance() 7223 self._add_comments(expression) 7224 return True 7225 7226 return None 7227 7228 def _match_set(self, types, advance=True): 7229 if not self._curr: 7230 return None 7231 7232 if self._curr.token_type in types: 7233 if advance: 7234 self._advance() 7235 return True 7236 7237 return None 7238 7239 def _match_pair(self, token_type_a, token_type_b, advance=True): 7240 if not self._curr or not self._next: 7241 return None 7242 7243 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7244 if advance: 7245 self._advance(2) 7246 return True 7247 7248 return None 7249 7250 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7251 if not self._match(TokenType.L_PAREN, expression=expression): 7252 self.raise_error("Expecting (") 7253 7254 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7255 if not self._match(TokenType.R_PAREN, expression=expression): 7256 self.raise_error("Expecting )") 7257 7258 def _match_texts(self, texts, advance=True): 7259 if ( 7260 self._curr 7261 and self._curr.token_type != TokenType.STRING 7262 and self._curr.text.upper() in texts 7263 ): 7264 if advance: 7265 self._advance() 7266 return True 7267 return None 7268 7269 def _match_text_seq(self, *texts, advance=True): 7270 index = self._index 7271 for text in texts: 7272 if ( 7273 self._curr 7274 and self._curr.token_type != TokenType.STRING 7275 and self._curr.text.upper() == text 7276 ): 7277 self._advance() 7278 else: 7279 self._retreat(index) 7280 return None 7281 7282 if not advance: 7283 self._retreat(index) 7284 7285 return True 7286 7287 def _replace_lambda( 7288 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7289 ) -> t.Optional[exp.Expression]: 7290 if not node: 7291 return node 7292 7293 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7294 7295 for column in node.find_all(exp.Column): 7296 typ = lambda_types.get(column.parts[0].name) 7297 if typ is not None: 7298 dot_or_id = column.to_dot() if column.table else column.this 7299 7300 if typ: 7301 dot_or_id = self.expression( 7302 exp.Cast, 7303 this=dot_or_id, 7304 to=typ, 7305 ) 7306 7307 parent = column.parent 7308 7309 while isinstance(parent, exp.Dot): 7310 if not isinstance(parent.parent, exp.Dot): 7311 parent.replace(dot_or_id) 7312 break 7313 parent = parent.parent 7314 else: 7315 if column is node: 7316 node = dot_or_id 7317 else: 7318 column.replace(dot_or_id) 7319 return node 7320 7321 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7322 start = self._prev 7323 7324 # Not to be confused with TRUNCATE(number, decimals) function call 7325 if self._match(TokenType.L_PAREN): 7326 self._retreat(self._index - 2) 7327 return self._parse_function() 7328 7329 # Clickhouse supports TRUNCATE DATABASE as well 7330 is_database = self._match(TokenType.DATABASE) 7331 7332 self._match(TokenType.TABLE) 7333 7334 exists = self._parse_exists(not_=False) 7335 7336 expressions = self._parse_csv( 7337 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7338 ) 7339 7340 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7341 7342 if self._match_text_seq("RESTART", "IDENTITY"): 7343 identity = "RESTART" 7344 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7345 identity = "CONTINUE" 7346 else: 7347 identity = None 7348 7349 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7350 option = self._prev.text 7351 else: 7352 option = None 7353 7354 partition = self._parse_partition() 7355 7356 # Fallback case 7357 if self._curr: 7358 return self._parse_as_command(start) 7359 7360 return self.expression( 7361 exp.TruncateTable, 7362 expressions=expressions, 7363 is_database=is_database, 7364 exists=exists, 7365 cluster=cluster, 7366 identity=identity, 7367 option=option, 7368 partition=partition, 7369 ) 7370 7371 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7372 this = self._parse_ordered(self._parse_opclass) 7373 7374 if not self._match(TokenType.WITH): 7375 return this 7376 7377 op = self._parse_var(any_token=True) 7378 7379 return self.expression(exp.WithOperator, this=this, op=op) 7380 7381 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7382 self._match(TokenType.EQ) 7383 self._match(TokenType.L_PAREN) 7384 7385 opts: t.List[t.Optional[exp.Expression]] = [] 7386 while self._curr and not self._match(TokenType.R_PAREN): 7387 if self._match_text_seq("FORMAT_NAME", "="): 7388 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7389 # so we parse it separately to use _parse_field() 7390 prop = self.expression( 7391 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7392 ) 7393 opts.append(prop) 7394 else: 7395 opts.append(self._parse_property()) 7396 7397 self._match(TokenType.COMMA) 7398 7399 return opts 7400 7401 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7402 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7403 7404 options = [] 7405 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7406 option = self._parse_var(any_token=True) 7407 prev = self._prev.text.upper() 7408 7409 # Different dialects might separate options and values by white space, "=" and "AS" 7410 self._match(TokenType.EQ) 7411 self._match(TokenType.ALIAS) 7412 7413 param = self.expression(exp.CopyParameter, this=option) 7414 7415 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7416 TokenType.L_PAREN, advance=False 7417 ): 7418 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7419 param.set("expressions", self._parse_wrapped_options()) 7420 elif prev == "FILE_FORMAT": 7421 # T-SQL's external file format case 7422 param.set("expression", self._parse_field()) 7423 else: 7424 param.set("expression", self._parse_unquoted_field()) 7425 7426 options.append(param) 7427 self._match(sep) 7428 7429 return options 7430 7431 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7432 expr = self.expression(exp.Credentials) 7433 7434 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7435 expr.set("storage", self._parse_field()) 7436 if self._match_text_seq("CREDENTIALS"): 7437 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7438 creds = ( 7439 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7440 ) 7441 expr.set("credentials", creds) 7442 if self._match_text_seq("ENCRYPTION"): 7443 expr.set("encryption", self._parse_wrapped_options()) 7444 if self._match_text_seq("IAM_ROLE"): 7445 expr.set("iam_role", self._parse_field()) 7446 if self._match_text_seq("REGION"): 7447 expr.set("region", self._parse_field()) 7448 7449 return expr 7450 7451 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7452 return self._parse_field() 7453 7454 def _parse_copy(self) -> exp.Copy | exp.Command: 7455 start = self._prev 7456 7457 self._match(TokenType.INTO) 7458 7459 this = ( 7460 self._parse_select(nested=True, parse_subquery_alias=False) 7461 if self._match(TokenType.L_PAREN, advance=False) 7462 else self._parse_table(schema=True) 7463 ) 7464 7465 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7466 7467 files = self._parse_csv(self._parse_file_location) 7468 credentials = self._parse_credentials() 7469 7470 self._match_text_seq("WITH") 7471 7472 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7473 7474 # Fallback case 7475 if self._curr: 7476 return self._parse_as_command(start) 7477 7478 return self.expression( 7479 exp.Copy, 7480 this=this, 7481 kind=kind, 7482 credentials=credentials, 7483 files=files, 7484 params=params, 7485 ) 7486 7487 def _parse_normalize(self) -> exp.Normalize: 7488 return self.expression( 7489 exp.Normalize, 7490 this=self._parse_bitwise(), 7491 form=self._match(TokenType.COMMA) and self._parse_var(), 7492 ) 7493 7494 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7495 if self._match_text_seq("COLUMNS", "(", advance=False): 7496 this = self._parse_function() 7497 if isinstance(this, exp.Columns): 7498 this.set("unpack", True) 7499 return this 7500 7501 return self.expression( 7502 exp.Star, 7503 **{ # type: ignore 7504 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7505 "replace": self._parse_star_op("REPLACE"), 7506 "rename": self._parse_star_op("RENAME"), 7507 }, 7508 ) 7509 7510 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7511 privilege_parts = [] 7512 7513 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7514 # (end of privilege list) or L_PAREN (start of column list) are met 7515 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7516 privilege_parts.append(self._curr.text.upper()) 7517 self._advance() 7518 7519 this = exp.var(" ".join(privilege_parts)) 7520 expressions = ( 7521 self._parse_wrapped_csv(self._parse_column) 7522 if self._match(TokenType.L_PAREN, advance=False) 7523 else None 7524 ) 7525 7526 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7527 7528 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7529 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7530 principal = self._parse_id_var() 7531 7532 if not principal: 7533 return None 7534 7535 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7536 7537 def _parse_grant(self) -> exp.Grant | exp.Command: 7538 start = self._prev 7539 7540 privileges = self._parse_csv(self._parse_grant_privilege) 7541 7542 self._match(TokenType.ON) 7543 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7544 7545 # Attempt to parse the securable e.g. MySQL allows names 7546 # such as "foo.*", "*.*" which are not easily parseable yet 7547 securable = self._try_parse(self._parse_table_parts) 7548 7549 if not securable or not self._match_text_seq("TO"): 7550 return self._parse_as_command(start) 7551 7552 principals = self._parse_csv(self._parse_grant_principal) 7553 7554 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7555 7556 if self._curr: 7557 return self._parse_as_command(start) 7558 7559 return self.expression( 7560 exp.Grant, 7561 privileges=privileges, 7562 kind=kind, 7563 securable=securable, 7564 principals=principals, 7565 grant_option=grant_option, 7566 ) 7567 7568 def _parse_overlay(self) -> exp.Overlay: 7569 return self.expression( 7570 exp.Overlay, 7571 **{ # type: ignore 7572 "this": self._parse_bitwise(), 7573 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7574 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7575 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7576 }, 7577 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1379 def __init__( 1380 self, 1381 error_level: t.Optional[ErrorLevel] = None, 1382 error_message_context: int = 100, 1383 max_errors: int = 3, 1384 dialect: DialectType = None, 1385 ): 1386 from sqlglot.dialects import Dialect 1387 1388 self.error_level = error_level or ErrorLevel.IMMEDIATE 1389 self.error_message_context = error_message_context 1390 self.max_errors = max_errors 1391 self.dialect = Dialect.get_or_raise(dialect) 1392 self.reset()
1404 def parse( 1405 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens and returns a list of syntax trees, one tree 1409 per parsed SQL statement. 1410 1411 Args: 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The list of the produced syntax trees. 1417 """ 1418 return self._parse( 1419 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1420 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1422 def parse_into( 1423 self, 1424 expression_types: exp.IntoType, 1425 raw_tokens: t.List[Token], 1426 sql: t.Optional[str] = None, 1427 ) -> t.List[t.Optional[exp.Expression]]: 1428 """ 1429 Parses a list of tokens into a given Expression type. If a collection of Expression 1430 types is given instead, this method will try to parse the token list into each one 1431 of them, stopping at the first for which the parsing succeeds. 1432 1433 Args: 1434 expression_types: The expression type(s) to try and parse the token list into. 1435 raw_tokens: The list of tokens. 1436 sql: The original SQL string, used to produce helpful debug messages. 1437 1438 Returns: 1439 The target Expression. 1440 """ 1441 errors = [] 1442 for expression_type in ensure_list(expression_types): 1443 parser = self.EXPRESSION_PARSERS.get(expression_type) 1444 if not parser: 1445 raise TypeError(f"No parser registered for {expression_type}") 1446 1447 try: 1448 return self._parse(parser, raw_tokens, sql) 1449 except ParseError as e: 1450 e.errors[0]["into_expression"] = expression_type 1451 errors.append(e) 1452 1453 raise ParseError( 1454 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1455 errors=merge_errors(errors), 1456 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1496 def check_errors(self) -> None: 1497 """Logs or raises any found errors, depending on the chosen error level setting.""" 1498 if self.error_level == ErrorLevel.WARN: 1499 for error in self.errors: 1500 logger.error(str(error)) 1501 elif self.error_level == ErrorLevel.RAISE and self.errors: 1502 raise ParseError( 1503 concat_messages(self.errors, self.max_errors), 1504 errors=merge_errors(self.errors), 1505 )
Logs or raises any found errors, depending on the chosen error level setting.
1507 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1508 """ 1509 Appends an error in the list of recorded errors or raises it, depending on the chosen 1510 error level setting. 1511 """ 1512 token = token or self._curr or self._prev or Token.string("") 1513 start = token.start 1514 end = token.end + 1 1515 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1516 highlight = self.sql[start:end] 1517 end_context = self.sql[end : end + self.error_message_context] 1518 1519 error = ParseError.new( 1520 f"{message}. Line {token.line}, Col: {token.col}.\n" 1521 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1522 description=message, 1523 line=token.line, 1524 col=token.col, 1525 start_context=start_context, 1526 highlight=highlight, 1527 end_context=end_context, 1528 ) 1529 1530 if self.error_level == ErrorLevel.IMMEDIATE: 1531 raise error 1532 1533 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1535 def expression( 1536 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1537 ) -> E: 1538 """ 1539 Creates a new, validated Expression. 1540 1541 Args: 1542 exp_class: The expression class to instantiate. 1543 comments: An optional list of comments to attach to the expression. 1544 kwargs: The arguments to set for the expression along with their respective values. 1545 1546 Returns: 1547 The target expression. 1548 """ 1549 instance = exp_class(**kwargs) 1550 instance.add_comments(comments) if comments else self._add_comments(instance) 1551 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1558 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1559 """ 1560 Validates an Expression, making sure that all its mandatory arguments are set. 1561 1562 Args: 1563 expression: The expression to validate. 1564 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1565 1566 Returns: 1567 The validated expression. 1568 """ 1569 if self.error_level != ErrorLevel.IGNORE: 1570 for error_message in expression.error_messages(args): 1571 self.raise_error(error_message) 1572 1573 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.