sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.HLLSKETCH, 360 TokenType.HSTORE, 361 TokenType.PSEUDO_TYPE, 362 TokenType.SUPER, 363 TokenType.SERIAL, 364 TokenType.SMALLSERIAL, 365 TokenType.BIGSERIAL, 366 TokenType.XML, 367 TokenType.YEAR, 368 TokenType.UNIQUEIDENTIFIER, 369 TokenType.USERDEFINED, 370 TokenType.MONEY, 371 TokenType.SMALLMONEY, 372 TokenType.ROWVERSION, 373 TokenType.IMAGE, 374 TokenType.VARIANT, 375 TokenType.VECTOR, 376 TokenType.OBJECT, 377 TokenType.OBJECT_IDENTIFIER, 378 TokenType.INET, 379 TokenType.IPADDRESS, 380 TokenType.IPPREFIX, 381 TokenType.IPV4, 382 TokenType.IPV6, 383 TokenType.UNKNOWN, 384 TokenType.NULL, 385 TokenType.NAME, 386 TokenType.TDIGEST, 387 *ENUM_TYPE_TOKENS, 388 *NESTED_TYPE_TOKENS, 389 *AGGREGATE_TYPE_TOKENS, 390 } 391 392 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 393 TokenType.BIGINT: TokenType.UBIGINT, 394 TokenType.INT: TokenType.UINT, 395 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 396 TokenType.SMALLINT: TokenType.USMALLINT, 397 TokenType.TINYINT: TokenType.UTINYINT, 398 TokenType.DECIMAL: TokenType.UDECIMAL, 399 } 400 401 SUBQUERY_PREDICATES = { 402 TokenType.ANY: exp.Any, 403 TokenType.ALL: exp.All, 404 TokenType.EXISTS: exp.Exists, 405 TokenType.SOME: exp.Any, 406 } 407 408 RESERVED_TOKENS = { 409 *Tokenizer.SINGLE_TOKENS.values(), 410 TokenType.SELECT, 411 } - {TokenType.IDENTIFIER} 412 413 DB_CREATABLES = { 414 TokenType.DATABASE, 415 TokenType.DICTIONARY, 416 TokenType.MODEL, 417 TokenType.SCHEMA, 418 TokenType.SEQUENCE, 419 TokenType.STORAGE_INTEGRATION, 420 TokenType.TABLE, 421 TokenType.TAG, 422 TokenType.VIEW, 423 TokenType.WAREHOUSE, 424 TokenType.STREAMLIT, 425 } 426 427 CREATABLES = { 428 TokenType.COLUMN, 429 TokenType.CONSTRAINT, 430 TokenType.FOREIGN_KEY, 431 TokenType.FUNCTION, 432 TokenType.INDEX, 433 TokenType.PROCEDURE, 434 *DB_CREATABLES, 435 } 436 437 ALTERABLES = { 438 TokenType.INDEX, 439 TokenType.TABLE, 440 TokenType.VIEW, 441 } 442 443 # Tokens that can represent identifiers 444 ID_VAR_TOKENS = { 445 TokenType.ALL, 446 TokenType.VAR, 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASC, 450 TokenType.ASOF, 451 TokenType.AUTO_INCREMENT, 452 TokenType.BEGIN, 453 TokenType.BPCHAR, 454 TokenType.CACHE, 455 TokenType.CASE, 456 TokenType.COLLATE, 457 TokenType.COMMAND, 458 TokenType.COMMENT, 459 TokenType.COMMIT, 460 TokenType.CONSTRAINT, 461 TokenType.COPY, 462 TokenType.CUBE, 463 TokenType.DEFAULT, 464 TokenType.DELETE, 465 TokenType.DESC, 466 TokenType.DESCRIBE, 467 TokenType.DICTIONARY, 468 TokenType.DIV, 469 TokenType.END, 470 TokenType.EXECUTE, 471 TokenType.ESCAPE, 472 TokenType.FALSE, 473 TokenType.FIRST, 474 TokenType.FILTER, 475 TokenType.FINAL, 476 TokenType.FORMAT, 477 TokenType.FULL, 478 TokenType.IDENTIFIER, 479 TokenType.IS, 480 TokenType.ISNULL, 481 TokenType.INTERVAL, 482 TokenType.KEEP, 483 TokenType.KILL, 484 TokenType.LEFT, 485 TokenType.LOAD, 486 TokenType.MERGE, 487 TokenType.NATURAL, 488 TokenType.NEXT, 489 TokenType.OFFSET, 490 TokenType.OPERATOR, 491 TokenType.ORDINALITY, 492 TokenType.OVERLAPS, 493 TokenType.OVERWRITE, 494 TokenType.PARTITION, 495 TokenType.PERCENT, 496 TokenType.PIVOT, 497 TokenType.PRAGMA, 498 TokenType.RANGE, 499 TokenType.RECURSIVE, 500 TokenType.REFERENCES, 501 TokenType.REFRESH, 502 TokenType.RENAME, 503 TokenType.REPLACE, 504 TokenType.RIGHT, 505 TokenType.ROLLUP, 506 TokenType.ROW, 507 TokenType.ROWS, 508 TokenType.SEMI, 509 TokenType.SET, 510 TokenType.SETTINGS, 511 TokenType.SHOW, 512 TokenType.TEMPORARY, 513 TokenType.TOP, 514 TokenType.TRUE, 515 TokenType.TRUNCATE, 516 TokenType.UNIQUE, 517 TokenType.UNNEST, 518 TokenType.UNPIVOT, 519 TokenType.UPDATE, 520 TokenType.USE, 521 TokenType.VOLATILE, 522 TokenType.WINDOW, 523 *CREATABLES, 524 *SUBQUERY_PREDICATES, 525 *TYPE_TOKENS, 526 *NO_PAREN_FUNCTIONS, 527 } 528 ID_VAR_TOKENS.remove(TokenType.UNION) 529 530 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 531 532 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 533 TokenType.ANTI, 534 TokenType.APPLY, 535 TokenType.ASOF, 536 TokenType.FULL, 537 TokenType.LEFT, 538 TokenType.LOCK, 539 TokenType.NATURAL, 540 TokenType.OFFSET, 541 TokenType.RIGHT, 542 TokenType.SEMI, 543 TokenType.WINDOW, 544 } 545 546 ALIAS_TOKENS = ID_VAR_TOKENS 547 548 ARRAY_CONSTRUCTORS = { 549 "ARRAY": exp.Array, 550 "LIST": exp.List, 551 } 552 553 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 554 555 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 556 557 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 558 559 FUNC_TOKENS = { 560 TokenType.COLLATE, 561 TokenType.COMMAND, 562 TokenType.CURRENT_DATE, 563 TokenType.CURRENT_DATETIME, 564 TokenType.CURRENT_TIMESTAMP, 565 TokenType.CURRENT_TIME, 566 TokenType.CURRENT_USER, 567 TokenType.FILTER, 568 TokenType.FIRST, 569 TokenType.FORMAT, 570 TokenType.GLOB, 571 TokenType.IDENTIFIER, 572 TokenType.INDEX, 573 TokenType.ISNULL, 574 TokenType.ILIKE, 575 TokenType.INSERT, 576 TokenType.LIKE, 577 TokenType.MERGE, 578 TokenType.OFFSET, 579 TokenType.PRIMARY_KEY, 580 TokenType.RANGE, 581 TokenType.REPLACE, 582 TokenType.RLIKE, 583 TokenType.ROW, 584 TokenType.UNNEST, 585 TokenType.VAR, 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.SEQUENCE, 589 TokenType.DATE, 590 TokenType.DATETIME, 591 TokenType.TABLE, 592 TokenType.TIMESTAMP, 593 TokenType.TIMESTAMPTZ, 594 TokenType.TRUNCATE, 595 TokenType.WINDOW, 596 TokenType.XOR, 597 *TYPE_TOKENS, 598 *SUBQUERY_PREDICATES, 599 } 600 601 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.AND: exp.And, 603 } 604 605 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.COLON_EQ: exp.PropertyEQ, 607 } 608 609 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.OR: exp.Or, 611 } 612 613 EQUALITY = { 614 TokenType.EQ: exp.EQ, 615 TokenType.NEQ: exp.NEQ, 616 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 617 } 618 619 COMPARISON = { 620 TokenType.GT: exp.GT, 621 TokenType.GTE: exp.GTE, 622 TokenType.LT: exp.LT, 623 TokenType.LTE: exp.LTE, 624 } 625 626 BITWISE = { 627 TokenType.AMP: exp.BitwiseAnd, 628 TokenType.CARET: exp.BitwiseXor, 629 TokenType.PIPE: exp.BitwiseOr, 630 } 631 632 TERM = { 633 TokenType.DASH: exp.Sub, 634 TokenType.PLUS: exp.Add, 635 TokenType.MOD: exp.Mod, 636 TokenType.COLLATE: exp.Collate, 637 } 638 639 FACTOR = { 640 TokenType.DIV: exp.IntDiv, 641 TokenType.LR_ARROW: exp.Distance, 642 TokenType.SLASH: exp.Div, 643 TokenType.STAR: exp.Mul, 644 } 645 646 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 647 648 TIMES = { 649 TokenType.TIME, 650 TokenType.TIMETZ, 651 } 652 653 TIMESTAMPS = { 654 TokenType.TIMESTAMP, 655 TokenType.TIMESTAMPTZ, 656 TokenType.TIMESTAMPLTZ, 657 *TIMES, 658 } 659 660 SET_OPERATIONS = { 661 TokenType.UNION, 662 TokenType.INTERSECT, 663 TokenType.EXCEPT, 664 } 665 666 JOIN_METHODS = { 667 TokenType.ASOF, 668 TokenType.NATURAL, 669 TokenType.POSITIONAL, 670 } 671 672 JOIN_SIDES = { 673 TokenType.LEFT, 674 TokenType.RIGHT, 675 TokenType.FULL, 676 } 677 678 JOIN_KINDS = { 679 TokenType.ANTI, 680 TokenType.CROSS, 681 TokenType.INNER, 682 TokenType.OUTER, 683 TokenType.SEMI, 684 TokenType.STRAIGHT_JOIN, 685 } 686 687 JOIN_HINTS: t.Set[str] = set() 688 689 LAMBDAS = { 690 TokenType.ARROW: lambda self, expressions: self.expression( 691 exp.Lambda, 692 this=self._replace_lambda( 693 self._parse_assignment(), 694 expressions, 695 ), 696 expressions=expressions, 697 ), 698 TokenType.FARROW: lambda self, expressions: self.expression( 699 exp.Kwarg, 700 this=exp.var(expressions[0].name), 701 expression=self._parse_assignment(), 702 ), 703 } 704 705 COLUMN_OPERATORS = { 706 TokenType.DOT: None, 707 TokenType.DCOLON: lambda self, this, to: self.expression( 708 exp.Cast if self.STRICT_CAST else exp.TryCast, 709 this=this, 710 to=to, 711 ), 712 TokenType.ARROW: lambda self, this, path: self.expression( 713 exp.JSONExtract, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.DARROW: lambda self, this, path: self.expression( 719 exp.JSONExtractScalar, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 725 exp.JSONBExtract, 726 this=this, 727 expression=path, 728 ), 729 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 730 exp.JSONBExtractScalar, 731 this=this, 732 expression=path, 733 ), 734 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 735 exp.JSONBContains, 736 this=this, 737 expression=key, 738 ), 739 } 740 741 EXPRESSION_PARSERS = { 742 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 743 exp.Column: lambda self: self._parse_column(), 744 exp.Condition: lambda self: self._parse_assignment(), 745 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 746 exp.Expression: lambda self: self._parse_expression(), 747 exp.From: lambda self: self._parse_from(joins=True), 748 exp.Group: lambda self: self._parse_group(), 749 exp.Having: lambda self: self._parse_having(), 750 exp.Identifier: lambda self: self._parse_id_var(), 751 exp.Join: lambda self: self._parse_join(), 752 exp.Lambda: lambda self: self._parse_lambda(), 753 exp.Lateral: lambda self: self._parse_lateral(), 754 exp.Limit: lambda self: self._parse_limit(), 755 exp.Offset: lambda self: self._parse_offset(), 756 exp.Order: lambda self: self._parse_order(), 757 exp.Ordered: lambda self: self._parse_ordered(), 758 exp.Properties: lambda self: self._parse_properties(), 759 exp.Qualify: lambda self: self._parse_qualify(), 760 exp.Returning: lambda self: self._parse_returning(), 761 exp.Select: lambda self: self._parse_select(), 762 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 763 exp.Table: lambda self: self._parse_table_parts(), 764 exp.TableAlias: lambda self: self._parse_table_alias(), 765 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 766 exp.Where: lambda self: self._parse_where(), 767 exp.Window: lambda self: self._parse_named_window(), 768 exp.With: lambda self: self._parse_with(), 769 "JOIN_TYPE": lambda self: self._parse_join_parts(), 770 } 771 772 STATEMENT_PARSERS = { 773 TokenType.ALTER: lambda self: self._parse_alter(), 774 TokenType.BEGIN: lambda self: self._parse_transaction(), 775 TokenType.CACHE: lambda self: self._parse_cache(), 776 TokenType.COMMENT: lambda self: self._parse_comment(), 777 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 778 TokenType.COPY: lambda self: self._parse_copy(), 779 TokenType.CREATE: lambda self: self._parse_create(), 780 TokenType.DELETE: lambda self: self._parse_delete(), 781 TokenType.DESC: lambda self: self._parse_describe(), 782 TokenType.DESCRIBE: lambda self: self._parse_describe(), 783 TokenType.DROP: lambda self: self._parse_drop(), 784 TokenType.GRANT: lambda self: self._parse_grant(), 785 TokenType.INSERT: lambda self: self._parse_insert(), 786 TokenType.KILL: lambda self: self._parse_kill(), 787 TokenType.LOAD: lambda self: self._parse_load(), 788 TokenType.MERGE: lambda self: self._parse_merge(), 789 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 790 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 791 TokenType.REFRESH: lambda self: self._parse_refresh(), 792 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 793 TokenType.SET: lambda self: self._parse_set(), 794 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 795 TokenType.UNCACHE: lambda self: self._parse_uncache(), 796 TokenType.UPDATE: lambda self: self._parse_update(), 797 TokenType.USE: lambda self: self.expression( 798 exp.Use, 799 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 800 this=self._parse_table(schema=False), 801 ), 802 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 803 } 804 805 UNARY_PARSERS = { 806 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 807 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 808 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 809 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 810 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 811 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 812 } 813 814 STRING_PARSERS = { 815 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 816 exp.RawString, this=token.text 817 ), 818 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 819 exp.National, this=token.text 820 ), 821 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 822 TokenType.STRING: lambda self, token: self.expression( 823 exp.Literal, this=token.text, is_string=True 824 ), 825 TokenType.UNICODE_STRING: lambda self, token: self.expression( 826 exp.UnicodeString, 827 this=token.text, 828 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 829 ), 830 } 831 832 NUMERIC_PARSERS = { 833 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 834 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 835 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 836 TokenType.NUMBER: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=False 838 ), 839 } 840 841 PRIMARY_PARSERS = { 842 **STRING_PARSERS, 843 **NUMERIC_PARSERS, 844 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 845 TokenType.NULL: lambda self, _: self.expression(exp.Null), 846 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 847 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 848 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 849 TokenType.STAR: lambda self, _: self._parse_star_ops(), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 864 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 865 TokenType.GLOB: binary_range_parser(exp.Glob), 866 TokenType.ILIKE: binary_range_parser(exp.ILike), 867 TokenType.IN: lambda self, this: self._parse_in(this), 868 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 869 TokenType.IS: lambda self, this: self._parse_is(this), 870 TokenType.LIKE: binary_range_parser(exp.Like), 871 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 872 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 873 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 874 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 875 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 876 } 877 878 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 879 "ALLOWED_VALUES": lambda self: self.expression( 880 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 881 ), 882 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 883 "AUTO": lambda self: self._parse_auto_property(), 884 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 885 "BACKUP": lambda self: self.expression( 886 exp.BackupProperty, this=self._parse_var(any_token=True) 887 ), 888 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 889 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 890 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHECKSUM": lambda self: self._parse_checksum(), 892 "CLUSTER BY": lambda self: self._parse_cluster(), 893 "CLUSTERED": lambda self: self._parse_clustered_by(), 894 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 895 exp.CollateProperty, **kwargs 896 ), 897 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 898 "CONTAINS": lambda self: self._parse_contains_property(), 899 "COPY": lambda self: self._parse_copy_property(), 900 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 901 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 902 "DEFINER": lambda self: self._parse_definer(), 903 "DETERMINISTIC": lambda self: self.expression( 904 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 905 ), 906 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 907 "DUPLICATE": lambda self: self._parse_duplicate(), 908 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 909 "DISTKEY": lambda self: self._parse_distkey(), 910 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 911 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 912 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 913 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 914 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 915 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 916 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 917 "FREESPACE": lambda self: self._parse_freespace(), 918 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 919 "HEAP": lambda self: self.expression(exp.HeapProperty), 920 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 921 "IMMUTABLE": lambda self: self.expression( 922 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 923 ), 924 "INHERITS": lambda self: self.expression( 925 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 926 ), 927 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 928 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 929 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 930 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 931 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 932 "LIKE": lambda self: self._parse_create_like(), 933 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 934 "LOCK": lambda self: self._parse_locking(), 935 "LOCKING": lambda self: self._parse_locking(), 936 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 937 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 938 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 939 "MODIFIES": lambda self: self._parse_modifies_property(), 940 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 941 "NO": lambda self: self._parse_no_property(), 942 "ON": lambda self: self._parse_on_property(), 943 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 944 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 945 "PARTITION": lambda self: self._parse_partitioned_of(), 946 "PARTITION BY": lambda self: self._parse_partitioned_by(), 947 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 949 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 950 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 951 "READS": lambda self: self._parse_reads_property(), 952 "REMOTE": lambda self: self._parse_remote_with_connection(), 953 "RETURNS": lambda self: self._parse_returns(), 954 "STRICT": lambda self: self.expression(exp.StrictProperty), 955 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 956 "ROW": lambda self: self._parse_row(), 957 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 958 "SAMPLE": lambda self: self.expression( 959 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 960 ), 961 "SECURE": lambda self: self.expression(exp.SecureProperty), 962 "SECURITY": lambda self: self._parse_security(), 963 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 964 "SETTINGS": lambda self: self._parse_settings_property(), 965 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 966 "SORTKEY": lambda self: self._parse_sortkey(), 967 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 968 "STABLE": lambda self: self.expression( 969 exp.StabilityProperty, this=exp.Literal.string("STABLE") 970 ), 971 "STORED": lambda self: self._parse_stored(), 972 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 973 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 974 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 975 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 976 "TO": lambda self: self._parse_to_table(), 977 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 978 "TRANSFORM": lambda self: self.expression( 979 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 980 ), 981 "TTL": lambda self: self._parse_ttl(), 982 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 983 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 984 "VOLATILE": lambda self: self._parse_volatile_property(), 985 "WITH": lambda self: self._parse_with_property(), 986 } 987 988 CONSTRAINT_PARSERS = { 989 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 990 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 991 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 992 "CHARACTER SET": lambda self: self.expression( 993 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 994 ), 995 "CHECK": lambda self: self.expression( 996 exp.CheckColumnConstraint, 997 this=self._parse_wrapped(self._parse_assignment), 998 enforced=self._match_text_seq("ENFORCED"), 999 ), 1000 "COLLATE": lambda self: self.expression( 1001 exp.CollateColumnConstraint, 1002 this=self._parse_identifier() or self._parse_column(), 1003 ), 1004 "COMMENT": lambda self: self.expression( 1005 exp.CommentColumnConstraint, this=self._parse_string() 1006 ), 1007 "COMPRESS": lambda self: self._parse_compress(), 1008 "CLUSTERED": lambda self: self.expression( 1009 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "NONCLUSTERED": lambda self: self.expression( 1012 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1013 ), 1014 "DEFAULT": lambda self: self.expression( 1015 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1016 ), 1017 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1018 "EPHEMERAL": lambda self: self.expression( 1019 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1020 ), 1021 "EXCLUDE": lambda self: self.expression( 1022 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1023 ), 1024 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1025 "FORMAT": lambda self: self.expression( 1026 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1027 ), 1028 "GENERATED": lambda self: self._parse_generated_as_identity(), 1029 "IDENTITY": lambda self: self._parse_auto_increment(), 1030 "INLINE": lambda self: self._parse_inline(), 1031 "LIKE": lambda self: self._parse_create_like(), 1032 "NOT": lambda self: self._parse_not_constraint(), 1033 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1034 "ON": lambda self: ( 1035 self._match(TokenType.UPDATE) 1036 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1037 ) 1038 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1039 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1040 "PERIOD": lambda self: self._parse_period_for_system_time(), 1041 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1042 "REFERENCES": lambda self: self._parse_references(match=False), 1043 "TITLE": lambda self: self.expression( 1044 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1045 ), 1046 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1047 "UNIQUE": lambda self: self._parse_unique(), 1048 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1049 "WITH": lambda self: self.expression( 1050 exp.Properties, expressions=self._parse_wrapped_properties() 1051 ), 1052 } 1053 1054 ALTER_PARSERS = { 1055 "ADD": lambda self: self._parse_alter_table_add(), 1056 "AS": lambda self: self._parse_select(), 1057 "ALTER": lambda self: self._parse_alter_table_alter(), 1058 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1059 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1060 "DROP": lambda self: self._parse_alter_table_drop(), 1061 "RENAME": lambda self: self._parse_alter_table_rename(), 1062 "SET": lambda self: self._parse_alter_table_set(), 1063 "SWAP": lambda self: self.expression( 1064 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1065 ), 1066 } 1067 1068 ALTER_ALTER_PARSERS = { 1069 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1070 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1071 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1072 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1073 } 1074 1075 SCHEMA_UNNAMED_CONSTRAINTS = { 1076 "CHECK", 1077 "EXCLUDE", 1078 "FOREIGN KEY", 1079 "LIKE", 1080 "PERIOD", 1081 "PRIMARY KEY", 1082 "UNIQUE", 1083 } 1084 1085 NO_PAREN_FUNCTION_PARSERS = { 1086 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1087 "CASE": lambda self: self._parse_case(), 1088 "CONNECT_BY_ROOT": lambda self: self.expression( 1089 exp.ConnectByRoot, this=self._parse_column() 1090 ), 1091 "IF": lambda self: self._parse_if(), 1092 "NEXT": lambda self: self._parse_next_value_for(), 1093 } 1094 1095 INVALID_FUNC_NAME_TOKENS = { 1096 TokenType.IDENTIFIER, 1097 TokenType.STRING, 1098 } 1099 1100 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1101 1102 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1103 1104 FUNCTION_PARSERS = { 1105 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1106 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1107 "DECODE": lambda self: self._parse_decode(), 1108 "EXTRACT": lambda self: self._parse_extract(), 1109 "GAP_FILL": lambda self: self._parse_gap_fill(), 1110 "JSON_OBJECT": lambda self: self._parse_json_object(), 1111 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1112 "JSON_TABLE": lambda self: self._parse_json_table(), 1113 "MATCH": lambda self: self._parse_match_against(), 1114 "NORMALIZE": lambda self: self._parse_normalize(), 1115 "OPENJSON": lambda self: self._parse_open_json(), 1116 "OVERLAY": lambda self: self._parse_overlay(), 1117 "POSITION": lambda self: self._parse_position(), 1118 "PREDICT": lambda self: self._parse_predict(), 1119 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1120 "STRING_AGG": lambda self: self._parse_string_agg(), 1121 "SUBSTRING": lambda self: self._parse_substring(), 1122 "TRIM": lambda self: self._parse_trim(), 1123 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1124 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1125 } 1126 1127 QUERY_MODIFIER_PARSERS = { 1128 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1129 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1130 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1131 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1132 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1133 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1134 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1135 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1136 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1137 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1138 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1139 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1140 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1141 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1142 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1143 TokenType.CLUSTER_BY: lambda self: ( 1144 "cluster", 1145 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1146 ), 1147 TokenType.DISTRIBUTE_BY: lambda self: ( 1148 "distribute", 1149 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1150 ), 1151 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1152 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1153 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1154 } 1155 1156 SET_PARSERS = { 1157 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1158 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1159 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1160 "TRANSACTION": lambda self: self._parse_set_transaction(), 1161 } 1162 1163 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1164 1165 TYPE_LITERAL_PARSERS = { 1166 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1167 } 1168 1169 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1170 1171 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1172 1173 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1174 1175 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1176 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1177 "ISOLATION": ( 1178 ("LEVEL", "REPEATABLE", "READ"), 1179 ("LEVEL", "READ", "COMMITTED"), 1180 ("LEVEL", "READ", "UNCOMITTED"), 1181 ("LEVEL", "SERIALIZABLE"), 1182 ), 1183 "READ": ("WRITE", "ONLY"), 1184 } 1185 1186 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1187 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1188 ) 1189 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1190 1191 CREATE_SEQUENCE: OPTIONS_TYPE = { 1192 "SCALE": ("EXTEND", "NOEXTEND"), 1193 "SHARD": ("EXTEND", "NOEXTEND"), 1194 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1195 **dict.fromkeys( 1196 ( 1197 "SESSION", 1198 "GLOBAL", 1199 "KEEP", 1200 "NOKEEP", 1201 "ORDER", 1202 "NOORDER", 1203 "NOCACHE", 1204 "CYCLE", 1205 "NOCYCLE", 1206 "NOMINVALUE", 1207 "NOMAXVALUE", 1208 "NOSCALE", 1209 "NOSHARD", 1210 ), 1211 tuple(), 1212 ), 1213 } 1214 1215 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1216 1217 USABLES: OPTIONS_TYPE = dict.fromkeys( 1218 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1219 ) 1220 1221 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1222 1223 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1224 "TYPE": ("EVOLUTION",), 1225 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1226 } 1227 1228 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1229 1230 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1231 1232 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1233 "NOT": ("ENFORCED",), 1234 "MATCH": ( 1235 "FULL", 1236 "PARTIAL", 1237 "SIMPLE", 1238 ), 1239 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1240 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1241 } 1242 1243 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1244 1245 CLONE_KEYWORDS = {"CLONE", "COPY"} 1246 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1247 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1248 1249 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1250 1251 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1252 1253 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1254 1255 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1256 1257 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1258 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1259 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1260 1261 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1262 1263 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1264 1265 ADD_CONSTRAINT_TOKENS = { 1266 TokenType.CONSTRAINT, 1267 TokenType.FOREIGN_KEY, 1268 TokenType.INDEX, 1269 TokenType.KEY, 1270 TokenType.PRIMARY_KEY, 1271 TokenType.UNIQUE, 1272 } 1273 1274 DISTINCT_TOKENS = {TokenType.DISTINCT} 1275 1276 NULL_TOKENS = {TokenType.NULL} 1277 1278 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1279 1280 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1281 1282 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1283 1284 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1285 1286 ODBC_DATETIME_LITERALS = { 1287 "d": exp.Date, 1288 "t": exp.Time, 1289 "ts": exp.Timestamp, 1290 } 1291 1292 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1293 1294 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1295 1296 # The style options for the DESCRIBE statement 1297 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1298 1299 OPERATION_MODIFIERS: t.Set[str] = set() 1300 1301 STRICT_CAST = True 1302 1303 PREFIXED_PIVOT_COLUMNS = False 1304 IDENTIFY_PIVOT_STRINGS = False 1305 1306 LOG_DEFAULTS_TO_LN = False 1307 1308 # Whether ADD is present for each column added by ALTER TABLE 1309 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1310 1311 # Whether the table sample clause expects CSV syntax 1312 TABLESAMPLE_CSV = False 1313 1314 # The default method used for table sampling 1315 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1316 1317 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1318 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1319 1320 # Whether the TRIM function expects the characters to trim as its first argument 1321 TRIM_PATTERN_FIRST = False 1322 1323 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1324 STRING_ALIASES = False 1325 1326 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1327 MODIFIERS_ATTACHED_TO_SET_OP = True 1328 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1329 1330 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1331 NO_PAREN_IF_COMMANDS = True 1332 1333 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1334 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1335 1336 # Whether the `:` operator is used to extract a value from a VARIANT column 1337 COLON_IS_VARIANT_EXTRACT = False 1338 1339 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1340 # If this is True and '(' is not found, the keyword will be treated as an identifier 1341 VALUES_FOLLOWED_BY_PAREN = True 1342 1343 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1344 SUPPORTS_IMPLICIT_UNNEST = False 1345 1346 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1347 INTERVAL_SPANS = True 1348 1349 # Whether a PARTITION clause can follow a table reference 1350 SUPPORTS_PARTITION_SELECTION = False 1351 1352 __slots__ = ( 1353 "error_level", 1354 "error_message_context", 1355 "max_errors", 1356 "dialect", 1357 "sql", 1358 "errors", 1359 "_tokens", 1360 "_index", 1361 "_curr", 1362 "_next", 1363 "_prev", 1364 "_prev_comments", 1365 ) 1366 1367 # Autofilled 1368 SHOW_TRIE: t.Dict = {} 1369 SET_TRIE: t.Dict = {} 1370 1371 def __init__( 1372 self, 1373 error_level: t.Optional[ErrorLevel] = None, 1374 error_message_context: int = 100, 1375 max_errors: int = 3, 1376 dialect: DialectType = None, 1377 ): 1378 from sqlglot.dialects import Dialect 1379 1380 self.error_level = error_level or ErrorLevel.IMMEDIATE 1381 self.error_message_context = error_message_context 1382 self.max_errors = max_errors 1383 self.dialect = Dialect.get_or_raise(dialect) 1384 self.reset() 1385 1386 def reset(self): 1387 self.sql = "" 1388 self.errors = [] 1389 self._tokens = [] 1390 self._index = 0 1391 self._curr = None 1392 self._next = None 1393 self._prev = None 1394 self._prev_comments = None 1395 1396 def parse( 1397 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1398 ) -> t.List[t.Optional[exp.Expression]]: 1399 """ 1400 Parses a list of tokens and returns a list of syntax trees, one tree 1401 per parsed SQL statement. 1402 1403 Args: 1404 raw_tokens: The list of tokens. 1405 sql: The original SQL string, used to produce helpful debug messages. 1406 1407 Returns: 1408 The list of the produced syntax trees. 1409 """ 1410 return self._parse( 1411 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1412 ) 1413 1414 def parse_into( 1415 self, 1416 expression_types: exp.IntoType, 1417 raw_tokens: t.List[Token], 1418 sql: t.Optional[str] = None, 1419 ) -> t.List[t.Optional[exp.Expression]]: 1420 """ 1421 Parses a list of tokens into a given Expression type. If a collection of Expression 1422 types is given instead, this method will try to parse the token list into each one 1423 of them, stopping at the first for which the parsing succeeds. 1424 1425 Args: 1426 expression_types: The expression type(s) to try and parse the token list into. 1427 raw_tokens: The list of tokens. 1428 sql: The original SQL string, used to produce helpful debug messages. 1429 1430 Returns: 1431 The target Expression. 1432 """ 1433 errors = [] 1434 for expression_type in ensure_list(expression_types): 1435 parser = self.EXPRESSION_PARSERS.get(expression_type) 1436 if not parser: 1437 raise TypeError(f"No parser registered for {expression_type}") 1438 1439 try: 1440 return self._parse(parser, raw_tokens, sql) 1441 except ParseError as e: 1442 e.errors[0]["into_expression"] = expression_type 1443 errors.append(e) 1444 1445 raise ParseError( 1446 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1447 errors=merge_errors(errors), 1448 ) from errors[-1] 1449 1450 def _parse( 1451 self, 1452 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1453 raw_tokens: t.List[Token], 1454 sql: t.Optional[str] = None, 1455 ) -> t.List[t.Optional[exp.Expression]]: 1456 self.reset() 1457 self.sql = sql or "" 1458 1459 total = len(raw_tokens) 1460 chunks: t.List[t.List[Token]] = [[]] 1461 1462 for i, token in enumerate(raw_tokens): 1463 if token.token_type == TokenType.SEMICOLON: 1464 if token.comments: 1465 chunks.append([token]) 1466 1467 if i < total - 1: 1468 chunks.append([]) 1469 else: 1470 chunks[-1].append(token) 1471 1472 expressions = [] 1473 1474 for tokens in chunks: 1475 self._index = -1 1476 self._tokens = tokens 1477 self._advance() 1478 1479 expressions.append(parse_method(self)) 1480 1481 if self._index < len(self._tokens): 1482 self.raise_error("Invalid expression / Unexpected token") 1483 1484 self.check_errors() 1485 1486 return expressions 1487 1488 def check_errors(self) -> None: 1489 """Logs or raises any found errors, depending on the chosen error level setting.""" 1490 if self.error_level == ErrorLevel.WARN: 1491 for error in self.errors: 1492 logger.error(str(error)) 1493 elif self.error_level == ErrorLevel.RAISE and self.errors: 1494 raise ParseError( 1495 concat_messages(self.errors, self.max_errors), 1496 errors=merge_errors(self.errors), 1497 ) 1498 1499 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1500 """ 1501 Appends an error in the list of recorded errors or raises it, depending on the chosen 1502 error level setting. 1503 """ 1504 token = token or self._curr or self._prev or Token.string("") 1505 start = token.start 1506 end = token.end + 1 1507 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1508 highlight = self.sql[start:end] 1509 end_context = self.sql[end : end + self.error_message_context] 1510 1511 error = ParseError.new( 1512 f"{message}. Line {token.line}, Col: {token.col}.\n" 1513 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1514 description=message, 1515 line=token.line, 1516 col=token.col, 1517 start_context=start_context, 1518 highlight=highlight, 1519 end_context=end_context, 1520 ) 1521 1522 if self.error_level == ErrorLevel.IMMEDIATE: 1523 raise error 1524 1525 self.errors.append(error) 1526 1527 def expression( 1528 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1529 ) -> E: 1530 """ 1531 Creates a new, validated Expression. 1532 1533 Args: 1534 exp_class: The expression class to instantiate. 1535 comments: An optional list of comments to attach to the expression. 1536 kwargs: The arguments to set for the expression along with their respective values. 1537 1538 Returns: 1539 The target expression. 1540 """ 1541 instance = exp_class(**kwargs) 1542 instance.add_comments(comments) if comments else self._add_comments(instance) 1543 return self.validate_expression(instance) 1544 1545 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1546 if expression and self._prev_comments: 1547 expression.add_comments(self._prev_comments) 1548 self._prev_comments = None 1549 1550 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1551 """ 1552 Validates an Expression, making sure that all its mandatory arguments are set. 1553 1554 Args: 1555 expression: The expression to validate. 1556 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1557 1558 Returns: 1559 The validated expression. 1560 """ 1561 if self.error_level != ErrorLevel.IGNORE: 1562 for error_message in expression.error_messages(args): 1563 self.raise_error(error_message) 1564 1565 return expression 1566 1567 def _find_sql(self, start: Token, end: Token) -> str: 1568 return self.sql[start.start : end.end + 1] 1569 1570 def _is_connected(self) -> bool: 1571 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1572 1573 def _advance(self, times: int = 1) -> None: 1574 self._index += times 1575 self._curr = seq_get(self._tokens, self._index) 1576 self._next = seq_get(self._tokens, self._index + 1) 1577 1578 if self._index > 0: 1579 self._prev = self._tokens[self._index - 1] 1580 self._prev_comments = self._prev.comments 1581 else: 1582 self._prev = None 1583 self._prev_comments = None 1584 1585 def _retreat(self, index: int) -> None: 1586 if index != self._index: 1587 self._advance(index - self._index) 1588 1589 def _warn_unsupported(self) -> None: 1590 if len(self._tokens) <= 1: 1591 return 1592 1593 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1594 # interested in emitting a warning for the one being currently processed. 1595 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1596 1597 logger.warning( 1598 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1599 ) 1600 1601 def _parse_command(self) -> exp.Command: 1602 self._warn_unsupported() 1603 return self.expression( 1604 exp.Command, 1605 comments=self._prev_comments, 1606 this=self._prev.text.upper(), 1607 expression=self._parse_string(), 1608 ) 1609 1610 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1611 """ 1612 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1613 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1614 solve this by setting & resetting the parser state accordingly 1615 """ 1616 index = self._index 1617 error_level = self.error_level 1618 1619 self.error_level = ErrorLevel.IMMEDIATE 1620 try: 1621 this = parse_method() 1622 except ParseError: 1623 this = None 1624 finally: 1625 if not this or retreat: 1626 self._retreat(index) 1627 self.error_level = error_level 1628 1629 return this 1630 1631 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1632 start = self._prev 1633 exists = self._parse_exists() if allow_exists else None 1634 1635 self._match(TokenType.ON) 1636 1637 materialized = self._match_text_seq("MATERIALIZED") 1638 kind = self._match_set(self.CREATABLES) and self._prev 1639 if not kind: 1640 return self._parse_as_command(start) 1641 1642 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1643 this = self._parse_user_defined_function(kind=kind.token_type) 1644 elif kind.token_type == TokenType.TABLE: 1645 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1646 elif kind.token_type == TokenType.COLUMN: 1647 this = self._parse_column() 1648 else: 1649 this = self._parse_id_var() 1650 1651 self._match(TokenType.IS) 1652 1653 return self.expression( 1654 exp.Comment, 1655 this=this, 1656 kind=kind.text, 1657 expression=self._parse_string(), 1658 exists=exists, 1659 materialized=materialized, 1660 ) 1661 1662 def _parse_to_table( 1663 self, 1664 ) -> exp.ToTableProperty: 1665 table = self._parse_table_parts(schema=True) 1666 return self.expression(exp.ToTableProperty, this=table) 1667 1668 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1669 def _parse_ttl(self) -> exp.Expression: 1670 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1671 this = self._parse_bitwise() 1672 1673 if self._match_text_seq("DELETE"): 1674 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1675 if self._match_text_seq("RECOMPRESS"): 1676 return self.expression( 1677 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1678 ) 1679 if self._match_text_seq("TO", "DISK"): 1680 return self.expression( 1681 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1682 ) 1683 if self._match_text_seq("TO", "VOLUME"): 1684 return self.expression( 1685 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1686 ) 1687 1688 return this 1689 1690 expressions = self._parse_csv(_parse_ttl_action) 1691 where = self._parse_where() 1692 group = self._parse_group() 1693 1694 aggregates = None 1695 if group and self._match(TokenType.SET): 1696 aggregates = self._parse_csv(self._parse_set_item) 1697 1698 return self.expression( 1699 exp.MergeTreeTTL, 1700 expressions=expressions, 1701 where=where, 1702 group=group, 1703 aggregates=aggregates, 1704 ) 1705 1706 def _parse_statement(self) -> t.Optional[exp.Expression]: 1707 if self._curr is None: 1708 return None 1709 1710 if self._match_set(self.STATEMENT_PARSERS): 1711 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1712 1713 if self._match_set(self.dialect.tokenizer.COMMANDS): 1714 return self._parse_command() 1715 1716 expression = self._parse_expression() 1717 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1718 return self._parse_query_modifiers(expression) 1719 1720 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1721 start = self._prev 1722 temporary = self._match(TokenType.TEMPORARY) 1723 materialized = self._match_text_seq("MATERIALIZED") 1724 1725 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1726 if not kind: 1727 return self._parse_as_command(start) 1728 1729 concurrently = self._match_text_seq("CONCURRENTLY") 1730 if_exists = exists or self._parse_exists() 1731 table = self._parse_table_parts( 1732 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1733 ) 1734 1735 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1736 1737 if self._match(TokenType.L_PAREN, advance=False): 1738 expressions = self._parse_wrapped_csv(self._parse_types) 1739 else: 1740 expressions = None 1741 1742 return self.expression( 1743 exp.Drop, 1744 comments=start.comments, 1745 exists=if_exists, 1746 this=table, 1747 expressions=expressions, 1748 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1749 temporary=temporary, 1750 materialized=materialized, 1751 cascade=self._match_text_seq("CASCADE"), 1752 constraints=self._match_text_seq("CONSTRAINTS"), 1753 purge=self._match_text_seq("PURGE"), 1754 cluster=cluster, 1755 concurrently=concurrently, 1756 ) 1757 1758 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1759 return ( 1760 self._match_text_seq("IF") 1761 and (not not_ or self._match(TokenType.NOT)) 1762 and self._match(TokenType.EXISTS) 1763 ) 1764 1765 def _parse_create(self) -> exp.Create | exp.Command: 1766 # Note: this can't be None because we've matched a statement parser 1767 start = self._prev 1768 comments = self._prev_comments 1769 1770 replace = ( 1771 start.token_type == TokenType.REPLACE 1772 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1773 or self._match_pair(TokenType.OR, TokenType.ALTER) 1774 ) 1775 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1776 1777 unique = self._match(TokenType.UNIQUE) 1778 1779 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1780 clustered = True 1781 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1782 "COLUMNSTORE" 1783 ): 1784 clustered = False 1785 else: 1786 clustered = None 1787 1788 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1789 self._advance() 1790 1791 properties = None 1792 create_token = self._match_set(self.CREATABLES) and self._prev 1793 1794 if not create_token: 1795 # exp.Properties.Location.POST_CREATE 1796 properties = self._parse_properties() 1797 create_token = self._match_set(self.CREATABLES) and self._prev 1798 1799 if not properties or not create_token: 1800 return self._parse_as_command(start) 1801 1802 concurrently = self._match_text_seq("CONCURRENTLY") 1803 exists = self._parse_exists(not_=True) 1804 this = None 1805 expression: t.Optional[exp.Expression] = None 1806 indexes = None 1807 no_schema_binding = None 1808 begin = None 1809 end = None 1810 clone = None 1811 1812 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1813 nonlocal properties 1814 if properties and temp_props: 1815 properties.expressions.extend(temp_props.expressions) 1816 elif temp_props: 1817 properties = temp_props 1818 1819 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1820 this = self._parse_user_defined_function(kind=create_token.token_type) 1821 1822 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1823 extend_props(self._parse_properties()) 1824 1825 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1826 extend_props(self._parse_properties()) 1827 1828 if not expression: 1829 if self._match(TokenType.COMMAND): 1830 expression = self._parse_as_command(self._prev) 1831 else: 1832 begin = self._match(TokenType.BEGIN) 1833 return_ = self._match_text_seq("RETURN") 1834 1835 if self._match(TokenType.STRING, advance=False): 1836 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1837 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1838 expression = self._parse_string() 1839 extend_props(self._parse_properties()) 1840 else: 1841 expression = self._parse_statement() 1842 1843 end = self._match_text_seq("END") 1844 1845 if return_: 1846 expression = self.expression(exp.Return, this=expression) 1847 elif create_token.token_type == TokenType.INDEX: 1848 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1849 if not self._match(TokenType.ON): 1850 index = self._parse_id_var() 1851 anonymous = False 1852 else: 1853 index = None 1854 anonymous = True 1855 1856 this = self._parse_index(index=index, anonymous=anonymous) 1857 elif create_token.token_type in self.DB_CREATABLES: 1858 table_parts = self._parse_table_parts( 1859 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1860 ) 1861 1862 # exp.Properties.Location.POST_NAME 1863 self._match(TokenType.COMMA) 1864 extend_props(self._parse_properties(before=True)) 1865 1866 this = self._parse_schema(this=table_parts) 1867 1868 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1869 extend_props(self._parse_properties()) 1870 1871 self._match(TokenType.ALIAS) 1872 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1873 # exp.Properties.Location.POST_ALIAS 1874 extend_props(self._parse_properties()) 1875 1876 if create_token.token_type == TokenType.SEQUENCE: 1877 expression = self._parse_types() 1878 extend_props(self._parse_properties()) 1879 else: 1880 expression = self._parse_ddl_select() 1881 1882 if create_token.token_type == TokenType.TABLE: 1883 # exp.Properties.Location.POST_EXPRESSION 1884 extend_props(self._parse_properties()) 1885 1886 indexes = [] 1887 while True: 1888 index = self._parse_index() 1889 1890 # exp.Properties.Location.POST_INDEX 1891 extend_props(self._parse_properties()) 1892 if not index: 1893 break 1894 else: 1895 self._match(TokenType.COMMA) 1896 indexes.append(index) 1897 elif create_token.token_type == TokenType.VIEW: 1898 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1899 no_schema_binding = True 1900 1901 shallow = self._match_text_seq("SHALLOW") 1902 1903 if self._match_texts(self.CLONE_KEYWORDS): 1904 copy = self._prev.text.lower() == "copy" 1905 clone = self.expression( 1906 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1907 ) 1908 1909 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1910 return self._parse_as_command(start) 1911 1912 create_kind_text = create_token.text.upper() 1913 return self.expression( 1914 exp.Create, 1915 comments=comments, 1916 this=this, 1917 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1918 replace=replace, 1919 refresh=refresh, 1920 unique=unique, 1921 expression=expression, 1922 exists=exists, 1923 properties=properties, 1924 indexes=indexes, 1925 no_schema_binding=no_schema_binding, 1926 begin=begin, 1927 end=end, 1928 clone=clone, 1929 concurrently=concurrently, 1930 clustered=clustered, 1931 ) 1932 1933 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1934 seq = exp.SequenceProperties() 1935 1936 options = [] 1937 index = self._index 1938 1939 while self._curr: 1940 self._match(TokenType.COMMA) 1941 if self._match_text_seq("INCREMENT"): 1942 self._match_text_seq("BY") 1943 self._match_text_seq("=") 1944 seq.set("increment", self._parse_term()) 1945 elif self._match_text_seq("MINVALUE"): 1946 seq.set("minvalue", self._parse_term()) 1947 elif self._match_text_seq("MAXVALUE"): 1948 seq.set("maxvalue", self._parse_term()) 1949 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1950 self._match_text_seq("=") 1951 seq.set("start", self._parse_term()) 1952 elif self._match_text_seq("CACHE"): 1953 # T-SQL allows empty CACHE which is initialized dynamically 1954 seq.set("cache", self._parse_number() or True) 1955 elif self._match_text_seq("OWNED", "BY"): 1956 # "OWNED BY NONE" is the default 1957 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1958 else: 1959 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1960 if opt: 1961 options.append(opt) 1962 else: 1963 break 1964 1965 seq.set("options", options if options else None) 1966 return None if self._index == index else seq 1967 1968 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1969 # only used for teradata currently 1970 self._match(TokenType.COMMA) 1971 1972 kwargs = { 1973 "no": self._match_text_seq("NO"), 1974 "dual": self._match_text_seq("DUAL"), 1975 "before": self._match_text_seq("BEFORE"), 1976 "default": self._match_text_seq("DEFAULT"), 1977 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1978 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1979 "after": self._match_text_seq("AFTER"), 1980 "minimum": self._match_texts(("MIN", "MINIMUM")), 1981 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1982 } 1983 1984 if self._match_texts(self.PROPERTY_PARSERS): 1985 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1986 try: 1987 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1988 except TypeError: 1989 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1990 1991 return None 1992 1993 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1994 return self._parse_wrapped_csv(self._parse_property) 1995 1996 def _parse_property(self) -> t.Optional[exp.Expression]: 1997 if self._match_texts(self.PROPERTY_PARSERS): 1998 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1999 2000 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2001 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2002 2003 if self._match_text_seq("COMPOUND", "SORTKEY"): 2004 return self._parse_sortkey(compound=True) 2005 2006 if self._match_text_seq("SQL", "SECURITY"): 2007 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2008 2009 index = self._index 2010 key = self._parse_column() 2011 2012 if not self._match(TokenType.EQ): 2013 self._retreat(index) 2014 return self._parse_sequence_properties() 2015 2016 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2017 if isinstance(key, exp.Column): 2018 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2019 2020 value = self._parse_bitwise() or self._parse_var(any_token=True) 2021 2022 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2023 if isinstance(value, exp.Column): 2024 value = exp.var(value.name) 2025 2026 return self.expression(exp.Property, this=key, value=value) 2027 2028 def _parse_stored(self) -> exp.FileFormatProperty: 2029 self._match(TokenType.ALIAS) 2030 2031 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2032 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2033 2034 return self.expression( 2035 exp.FileFormatProperty, 2036 this=( 2037 self.expression( 2038 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2039 ) 2040 if input_format or output_format 2041 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2042 ), 2043 ) 2044 2045 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2046 field = self._parse_field() 2047 if isinstance(field, exp.Identifier) and not field.quoted: 2048 field = exp.var(field) 2049 2050 return field 2051 2052 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2053 self._match(TokenType.EQ) 2054 self._match(TokenType.ALIAS) 2055 2056 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2057 2058 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2059 properties = [] 2060 while True: 2061 if before: 2062 prop = self._parse_property_before() 2063 else: 2064 prop = self._parse_property() 2065 if not prop: 2066 break 2067 for p in ensure_list(prop): 2068 properties.append(p) 2069 2070 if properties: 2071 return self.expression(exp.Properties, expressions=properties) 2072 2073 return None 2074 2075 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2076 return self.expression( 2077 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2078 ) 2079 2080 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2081 if self._match_texts(("DEFINER", "INVOKER")): 2082 security_specifier = self._prev.text.upper() 2083 return self.expression(exp.SecurityProperty, this=security_specifier) 2084 return None 2085 2086 def _parse_settings_property(self) -> exp.SettingsProperty: 2087 return self.expression( 2088 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2089 ) 2090 2091 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2092 if self._index >= 2: 2093 pre_volatile_token = self._tokens[self._index - 2] 2094 else: 2095 pre_volatile_token = None 2096 2097 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2098 return exp.VolatileProperty() 2099 2100 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2101 2102 def _parse_retention_period(self) -> exp.Var: 2103 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2104 number = self._parse_number() 2105 number_str = f"{number} " if number else "" 2106 unit = self._parse_var(any_token=True) 2107 return exp.var(f"{number_str}{unit}") 2108 2109 def _parse_system_versioning_property( 2110 self, with_: bool = False 2111 ) -> exp.WithSystemVersioningProperty: 2112 self._match(TokenType.EQ) 2113 prop = self.expression( 2114 exp.WithSystemVersioningProperty, 2115 **{ # type: ignore 2116 "on": True, 2117 "with": with_, 2118 }, 2119 ) 2120 2121 if self._match_text_seq("OFF"): 2122 prop.set("on", False) 2123 return prop 2124 2125 self._match(TokenType.ON) 2126 if self._match(TokenType.L_PAREN): 2127 while self._curr and not self._match(TokenType.R_PAREN): 2128 if self._match_text_seq("HISTORY_TABLE", "="): 2129 prop.set("this", self._parse_table_parts()) 2130 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2131 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2132 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2133 prop.set("retention_period", self._parse_retention_period()) 2134 2135 self._match(TokenType.COMMA) 2136 2137 return prop 2138 2139 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2140 self._match(TokenType.EQ) 2141 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2142 prop = self.expression(exp.DataDeletionProperty, on=on) 2143 2144 if self._match(TokenType.L_PAREN): 2145 while self._curr and not self._match(TokenType.R_PAREN): 2146 if self._match_text_seq("FILTER_COLUMN", "="): 2147 prop.set("filter_column", self._parse_column()) 2148 elif self._match_text_seq("RETENTION_PERIOD", "="): 2149 prop.set("retention_period", self._parse_retention_period()) 2150 2151 self._match(TokenType.COMMA) 2152 2153 return prop 2154 2155 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2156 kind = "HASH" 2157 expressions: t.Optional[t.List[exp.Expression]] = None 2158 if self._match_text_seq("BY", "HASH"): 2159 expressions = self._parse_wrapped_csv(self._parse_id_var) 2160 elif self._match_text_seq("BY", "RANDOM"): 2161 kind = "RANDOM" 2162 2163 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2164 buckets: t.Optional[exp.Expression] = None 2165 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2166 buckets = self._parse_number() 2167 2168 return self.expression( 2169 exp.DistributedByProperty, 2170 expressions=expressions, 2171 kind=kind, 2172 buckets=buckets, 2173 order=self._parse_order(), 2174 ) 2175 2176 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2177 self._match_text_seq("KEY") 2178 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2179 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2180 2181 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2182 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2183 prop = self._parse_system_versioning_property(with_=True) 2184 self._match_r_paren() 2185 return prop 2186 2187 if self._match(TokenType.L_PAREN, advance=False): 2188 return self._parse_wrapped_properties() 2189 2190 if self._match_text_seq("JOURNAL"): 2191 return self._parse_withjournaltable() 2192 2193 if self._match_texts(self.VIEW_ATTRIBUTES): 2194 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2195 2196 if self._match_text_seq("DATA"): 2197 return self._parse_withdata(no=False) 2198 elif self._match_text_seq("NO", "DATA"): 2199 return self._parse_withdata(no=True) 2200 2201 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2202 return self._parse_serde_properties(with_=True) 2203 2204 if self._match(TokenType.SCHEMA): 2205 return self.expression( 2206 exp.WithSchemaBindingProperty, 2207 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2208 ) 2209 2210 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2211 return self.expression( 2212 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2213 ) 2214 2215 if not self._next: 2216 return None 2217 2218 return self._parse_withisolatedloading() 2219 2220 def _parse_procedure_option(self) -> exp.Expression | None: 2221 if self._match_text_seq("EXECUTE", "AS"): 2222 return self.expression( 2223 exp.ExecuteAsProperty, 2224 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2225 or self._parse_string(), 2226 ) 2227 2228 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2229 2230 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2231 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2232 self._match(TokenType.EQ) 2233 2234 user = self._parse_id_var() 2235 self._match(TokenType.PARAMETER) 2236 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2237 2238 if not user or not host: 2239 return None 2240 2241 return exp.DefinerProperty(this=f"{user}@{host}") 2242 2243 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2244 self._match(TokenType.TABLE) 2245 self._match(TokenType.EQ) 2246 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2247 2248 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2249 return self.expression(exp.LogProperty, no=no) 2250 2251 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2252 return self.expression(exp.JournalProperty, **kwargs) 2253 2254 def _parse_checksum(self) -> exp.ChecksumProperty: 2255 self._match(TokenType.EQ) 2256 2257 on = None 2258 if self._match(TokenType.ON): 2259 on = True 2260 elif self._match_text_seq("OFF"): 2261 on = False 2262 2263 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2264 2265 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2266 return self.expression( 2267 exp.Cluster, 2268 expressions=( 2269 self._parse_wrapped_csv(self._parse_ordered) 2270 if wrapped 2271 else self._parse_csv(self._parse_ordered) 2272 ), 2273 ) 2274 2275 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2276 self._match_text_seq("BY") 2277 2278 self._match_l_paren() 2279 expressions = self._parse_csv(self._parse_column) 2280 self._match_r_paren() 2281 2282 if self._match_text_seq("SORTED", "BY"): 2283 self._match_l_paren() 2284 sorted_by = self._parse_csv(self._parse_ordered) 2285 self._match_r_paren() 2286 else: 2287 sorted_by = None 2288 2289 self._match(TokenType.INTO) 2290 buckets = self._parse_number() 2291 self._match_text_seq("BUCKETS") 2292 2293 return self.expression( 2294 exp.ClusteredByProperty, 2295 expressions=expressions, 2296 sorted_by=sorted_by, 2297 buckets=buckets, 2298 ) 2299 2300 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2301 if not self._match_text_seq("GRANTS"): 2302 self._retreat(self._index - 1) 2303 return None 2304 2305 return self.expression(exp.CopyGrantsProperty) 2306 2307 def _parse_freespace(self) -> exp.FreespaceProperty: 2308 self._match(TokenType.EQ) 2309 return self.expression( 2310 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2311 ) 2312 2313 def _parse_mergeblockratio( 2314 self, no: bool = False, default: bool = False 2315 ) -> exp.MergeBlockRatioProperty: 2316 if self._match(TokenType.EQ): 2317 return self.expression( 2318 exp.MergeBlockRatioProperty, 2319 this=self._parse_number(), 2320 percent=self._match(TokenType.PERCENT), 2321 ) 2322 2323 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2324 2325 def _parse_datablocksize( 2326 self, 2327 default: t.Optional[bool] = None, 2328 minimum: t.Optional[bool] = None, 2329 maximum: t.Optional[bool] = None, 2330 ) -> exp.DataBlocksizeProperty: 2331 self._match(TokenType.EQ) 2332 size = self._parse_number() 2333 2334 units = None 2335 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2336 units = self._prev.text 2337 2338 return self.expression( 2339 exp.DataBlocksizeProperty, 2340 size=size, 2341 units=units, 2342 default=default, 2343 minimum=minimum, 2344 maximum=maximum, 2345 ) 2346 2347 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2348 self._match(TokenType.EQ) 2349 always = self._match_text_seq("ALWAYS") 2350 manual = self._match_text_seq("MANUAL") 2351 never = self._match_text_seq("NEVER") 2352 default = self._match_text_seq("DEFAULT") 2353 2354 autotemp = None 2355 if self._match_text_seq("AUTOTEMP"): 2356 autotemp = self._parse_schema() 2357 2358 return self.expression( 2359 exp.BlockCompressionProperty, 2360 always=always, 2361 manual=manual, 2362 never=never, 2363 default=default, 2364 autotemp=autotemp, 2365 ) 2366 2367 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2368 index = self._index 2369 no = self._match_text_seq("NO") 2370 concurrent = self._match_text_seq("CONCURRENT") 2371 2372 if not self._match_text_seq("ISOLATED", "LOADING"): 2373 self._retreat(index) 2374 return None 2375 2376 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2377 return self.expression( 2378 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2379 ) 2380 2381 def _parse_locking(self) -> exp.LockingProperty: 2382 if self._match(TokenType.TABLE): 2383 kind = "TABLE" 2384 elif self._match(TokenType.VIEW): 2385 kind = "VIEW" 2386 elif self._match(TokenType.ROW): 2387 kind = "ROW" 2388 elif self._match_text_seq("DATABASE"): 2389 kind = "DATABASE" 2390 else: 2391 kind = None 2392 2393 if kind in ("DATABASE", "TABLE", "VIEW"): 2394 this = self._parse_table_parts() 2395 else: 2396 this = None 2397 2398 if self._match(TokenType.FOR): 2399 for_or_in = "FOR" 2400 elif self._match(TokenType.IN): 2401 for_or_in = "IN" 2402 else: 2403 for_or_in = None 2404 2405 if self._match_text_seq("ACCESS"): 2406 lock_type = "ACCESS" 2407 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2408 lock_type = "EXCLUSIVE" 2409 elif self._match_text_seq("SHARE"): 2410 lock_type = "SHARE" 2411 elif self._match_text_seq("READ"): 2412 lock_type = "READ" 2413 elif self._match_text_seq("WRITE"): 2414 lock_type = "WRITE" 2415 elif self._match_text_seq("CHECKSUM"): 2416 lock_type = "CHECKSUM" 2417 else: 2418 lock_type = None 2419 2420 override = self._match_text_seq("OVERRIDE") 2421 2422 return self.expression( 2423 exp.LockingProperty, 2424 this=this, 2425 kind=kind, 2426 for_or_in=for_or_in, 2427 lock_type=lock_type, 2428 override=override, 2429 ) 2430 2431 def _parse_partition_by(self) -> t.List[exp.Expression]: 2432 if self._match(TokenType.PARTITION_BY): 2433 return self._parse_csv(self._parse_assignment) 2434 return [] 2435 2436 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2437 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2438 if self._match_text_seq("MINVALUE"): 2439 return exp.var("MINVALUE") 2440 if self._match_text_seq("MAXVALUE"): 2441 return exp.var("MAXVALUE") 2442 return self._parse_bitwise() 2443 2444 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2445 expression = None 2446 from_expressions = None 2447 to_expressions = None 2448 2449 if self._match(TokenType.IN): 2450 this = self._parse_wrapped_csv(self._parse_bitwise) 2451 elif self._match(TokenType.FROM): 2452 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2453 self._match_text_seq("TO") 2454 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2455 elif self._match_text_seq("WITH", "(", "MODULUS"): 2456 this = self._parse_number() 2457 self._match_text_seq(",", "REMAINDER") 2458 expression = self._parse_number() 2459 self._match_r_paren() 2460 else: 2461 self.raise_error("Failed to parse partition bound spec.") 2462 2463 return self.expression( 2464 exp.PartitionBoundSpec, 2465 this=this, 2466 expression=expression, 2467 from_expressions=from_expressions, 2468 to_expressions=to_expressions, 2469 ) 2470 2471 # https://www.postgresql.org/docs/current/sql-createtable.html 2472 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2473 if not self._match_text_seq("OF"): 2474 self._retreat(self._index - 1) 2475 return None 2476 2477 this = self._parse_table(schema=True) 2478 2479 if self._match(TokenType.DEFAULT): 2480 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2481 elif self._match_text_seq("FOR", "VALUES"): 2482 expression = self._parse_partition_bound_spec() 2483 else: 2484 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2485 2486 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2487 2488 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2489 self._match(TokenType.EQ) 2490 return self.expression( 2491 exp.PartitionedByProperty, 2492 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2493 ) 2494 2495 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2496 if self._match_text_seq("AND", "STATISTICS"): 2497 statistics = True 2498 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2499 statistics = False 2500 else: 2501 statistics = None 2502 2503 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2504 2505 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2506 if self._match_text_seq("SQL"): 2507 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2508 return None 2509 2510 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2511 if self._match_text_seq("SQL", "DATA"): 2512 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2513 return None 2514 2515 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2516 if self._match_text_seq("PRIMARY", "INDEX"): 2517 return exp.NoPrimaryIndexProperty() 2518 if self._match_text_seq("SQL"): 2519 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2520 return None 2521 2522 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2523 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2524 return exp.OnCommitProperty() 2525 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2526 return exp.OnCommitProperty(delete=True) 2527 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2528 2529 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2530 if self._match_text_seq("SQL", "DATA"): 2531 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2532 return None 2533 2534 def _parse_distkey(self) -> exp.DistKeyProperty: 2535 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2536 2537 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2538 table = self._parse_table(schema=True) 2539 2540 options = [] 2541 while self._match_texts(("INCLUDING", "EXCLUDING")): 2542 this = self._prev.text.upper() 2543 2544 id_var = self._parse_id_var() 2545 if not id_var: 2546 return None 2547 2548 options.append( 2549 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2550 ) 2551 2552 return self.expression(exp.LikeProperty, this=table, expressions=options) 2553 2554 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2555 return self.expression( 2556 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2557 ) 2558 2559 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2560 self._match(TokenType.EQ) 2561 return self.expression( 2562 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2563 ) 2564 2565 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2566 self._match_text_seq("WITH", "CONNECTION") 2567 return self.expression( 2568 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2569 ) 2570 2571 def _parse_returns(self) -> exp.ReturnsProperty: 2572 value: t.Optional[exp.Expression] 2573 null = None 2574 is_table = self._match(TokenType.TABLE) 2575 2576 if is_table: 2577 if self._match(TokenType.LT): 2578 value = self.expression( 2579 exp.Schema, 2580 this="TABLE", 2581 expressions=self._parse_csv(self._parse_struct_types), 2582 ) 2583 if not self._match(TokenType.GT): 2584 self.raise_error("Expecting >") 2585 else: 2586 value = self._parse_schema(exp.var("TABLE")) 2587 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2588 null = True 2589 value = None 2590 else: 2591 value = self._parse_types() 2592 2593 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2594 2595 def _parse_describe(self) -> exp.Describe: 2596 kind = self._match_set(self.CREATABLES) and self._prev.text 2597 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2598 if self._match(TokenType.DOT): 2599 style = None 2600 self._retreat(self._index - 2) 2601 this = self._parse_table(schema=True) 2602 properties = self._parse_properties() 2603 expressions = properties.expressions if properties else None 2604 partition = self._parse_partition() 2605 return self.expression( 2606 exp.Describe, 2607 this=this, 2608 style=style, 2609 kind=kind, 2610 expressions=expressions, 2611 partition=partition, 2612 ) 2613 2614 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2615 kind = self._prev.text.upper() 2616 expressions = [] 2617 2618 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2619 if self._match(TokenType.WHEN): 2620 expression = self._parse_disjunction() 2621 self._match(TokenType.THEN) 2622 else: 2623 expression = None 2624 2625 else_ = self._match(TokenType.ELSE) 2626 2627 if not self._match(TokenType.INTO): 2628 return None 2629 2630 return self.expression( 2631 exp.ConditionalInsert, 2632 this=self.expression( 2633 exp.Insert, 2634 this=self._parse_table(schema=True), 2635 expression=self._parse_derived_table_values(), 2636 ), 2637 expression=expression, 2638 else_=else_, 2639 ) 2640 2641 expression = parse_conditional_insert() 2642 while expression is not None: 2643 expressions.append(expression) 2644 expression = parse_conditional_insert() 2645 2646 return self.expression( 2647 exp.MultitableInserts, 2648 kind=kind, 2649 comments=comments, 2650 expressions=expressions, 2651 source=self._parse_table(), 2652 ) 2653 2654 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2655 comments = ensure_list(self._prev_comments) 2656 hint = self._parse_hint() 2657 overwrite = self._match(TokenType.OVERWRITE) 2658 ignore = self._match(TokenType.IGNORE) 2659 local = self._match_text_seq("LOCAL") 2660 alternative = None 2661 is_function = None 2662 2663 if self._match_text_seq("DIRECTORY"): 2664 this: t.Optional[exp.Expression] = self.expression( 2665 exp.Directory, 2666 this=self._parse_var_or_string(), 2667 local=local, 2668 row_format=self._parse_row_format(match_row=True), 2669 ) 2670 else: 2671 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2672 comments += ensure_list(self._prev_comments) 2673 return self._parse_multitable_inserts(comments) 2674 2675 if self._match(TokenType.OR): 2676 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2677 2678 self._match(TokenType.INTO) 2679 comments += ensure_list(self._prev_comments) 2680 self._match(TokenType.TABLE) 2681 is_function = self._match(TokenType.FUNCTION) 2682 2683 this = ( 2684 self._parse_table(schema=True, parse_partition=True) 2685 if not is_function 2686 else self._parse_function() 2687 ) 2688 2689 returning = self._parse_returning() 2690 2691 return self.expression( 2692 exp.Insert, 2693 comments=comments, 2694 hint=hint, 2695 is_function=is_function, 2696 this=this, 2697 stored=self._match_text_seq("STORED") and self._parse_stored(), 2698 by_name=self._match_text_seq("BY", "NAME"), 2699 exists=self._parse_exists(), 2700 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2701 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2702 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2703 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2704 conflict=self._parse_on_conflict(), 2705 returning=returning or self._parse_returning(), 2706 overwrite=overwrite, 2707 alternative=alternative, 2708 ignore=ignore, 2709 source=self._match(TokenType.TABLE) and self._parse_table(), 2710 ) 2711 2712 def _parse_kill(self) -> exp.Kill: 2713 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2714 2715 return self.expression( 2716 exp.Kill, 2717 this=self._parse_primary(), 2718 kind=kind, 2719 ) 2720 2721 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2722 conflict = self._match_text_seq("ON", "CONFLICT") 2723 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2724 2725 if not conflict and not duplicate: 2726 return None 2727 2728 conflict_keys = None 2729 constraint = None 2730 2731 if conflict: 2732 if self._match_text_seq("ON", "CONSTRAINT"): 2733 constraint = self._parse_id_var() 2734 elif self._match(TokenType.L_PAREN): 2735 conflict_keys = self._parse_csv(self._parse_id_var) 2736 self._match_r_paren() 2737 2738 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2739 if self._prev.token_type == TokenType.UPDATE: 2740 self._match(TokenType.SET) 2741 expressions = self._parse_csv(self._parse_equality) 2742 else: 2743 expressions = None 2744 2745 return self.expression( 2746 exp.OnConflict, 2747 duplicate=duplicate, 2748 expressions=expressions, 2749 action=action, 2750 conflict_keys=conflict_keys, 2751 constraint=constraint, 2752 ) 2753 2754 def _parse_returning(self) -> t.Optional[exp.Returning]: 2755 if not self._match(TokenType.RETURNING): 2756 return None 2757 return self.expression( 2758 exp.Returning, 2759 expressions=self._parse_csv(self._parse_expression), 2760 into=self._match(TokenType.INTO) and self._parse_table_part(), 2761 ) 2762 2763 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2764 if not self._match(TokenType.FORMAT): 2765 return None 2766 return self._parse_row_format() 2767 2768 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2769 index = self._index 2770 with_ = with_ or self._match_text_seq("WITH") 2771 2772 if not self._match(TokenType.SERDE_PROPERTIES): 2773 self._retreat(index) 2774 return None 2775 return self.expression( 2776 exp.SerdeProperties, 2777 **{ # type: ignore 2778 "expressions": self._parse_wrapped_properties(), 2779 "with": with_, 2780 }, 2781 ) 2782 2783 def _parse_row_format( 2784 self, match_row: bool = False 2785 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2786 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2787 return None 2788 2789 if self._match_text_seq("SERDE"): 2790 this = self._parse_string() 2791 2792 serde_properties = self._parse_serde_properties() 2793 2794 return self.expression( 2795 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2796 ) 2797 2798 self._match_text_seq("DELIMITED") 2799 2800 kwargs = {} 2801 2802 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2803 kwargs["fields"] = self._parse_string() 2804 if self._match_text_seq("ESCAPED", "BY"): 2805 kwargs["escaped"] = self._parse_string() 2806 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2807 kwargs["collection_items"] = self._parse_string() 2808 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2809 kwargs["map_keys"] = self._parse_string() 2810 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2811 kwargs["lines"] = self._parse_string() 2812 if self._match_text_seq("NULL", "DEFINED", "AS"): 2813 kwargs["null"] = self._parse_string() 2814 2815 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2816 2817 def _parse_load(self) -> exp.LoadData | exp.Command: 2818 if self._match_text_seq("DATA"): 2819 local = self._match_text_seq("LOCAL") 2820 self._match_text_seq("INPATH") 2821 inpath = self._parse_string() 2822 overwrite = self._match(TokenType.OVERWRITE) 2823 self._match_pair(TokenType.INTO, TokenType.TABLE) 2824 2825 return self.expression( 2826 exp.LoadData, 2827 this=self._parse_table(schema=True), 2828 local=local, 2829 overwrite=overwrite, 2830 inpath=inpath, 2831 partition=self._parse_partition(), 2832 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2833 serde=self._match_text_seq("SERDE") and self._parse_string(), 2834 ) 2835 return self._parse_as_command(self._prev) 2836 2837 def _parse_delete(self) -> exp.Delete: 2838 # This handles MySQL's "Multiple-Table Syntax" 2839 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2840 tables = None 2841 comments = self._prev_comments 2842 if not self._match(TokenType.FROM, advance=False): 2843 tables = self._parse_csv(self._parse_table) or None 2844 2845 returning = self._parse_returning() 2846 2847 return self.expression( 2848 exp.Delete, 2849 comments=comments, 2850 tables=tables, 2851 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2852 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2853 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2854 where=self._parse_where(), 2855 returning=returning or self._parse_returning(), 2856 limit=self._parse_limit(), 2857 ) 2858 2859 def _parse_update(self) -> exp.Update: 2860 comments = self._prev_comments 2861 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2862 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2863 returning = self._parse_returning() 2864 return self.expression( 2865 exp.Update, 2866 comments=comments, 2867 **{ # type: ignore 2868 "this": this, 2869 "expressions": expressions, 2870 "from": self._parse_from(joins=True), 2871 "where": self._parse_where(), 2872 "returning": returning or self._parse_returning(), 2873 "order": self._parse_order(), 2874 "limit": self._parse_limit(), 2875 }, 2876 ) 2877 2878 def _parse_uncache(self) -> exp.Uncache: 2879 if not self._match(TokenType.TABLE): 2880 self.raise_error("Expecting TABLE after UNCACHE") 2881 2882 return self.expression( 2883 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2884 ) 2885 2886 def _parse_cache(self) -> exp.Cache: 2887 lazy = self._match_text_seq("LAZY") 2888 self._match(TokenType.TABLE) 2889 table = self._parse_table(schema=True) 2890 2891 options = [] 2892 if self._match_text_seq("OPTIONS"): 2893 self._match_l_paren() 2894 k = self._parse_string() 2895 self._match(TokenType.EQ) 2896 v = self._parse_string() 2897 options = [k, v] 2898 self._match_r_paren() 2899 2900 self._match(TokenType.ALIAS) 2901 return self.expression( 2902 exp.Cache, 2903 this=table, 2904 lazy=lazy, 2905 options=options, 2906 expression=self._parse_select(nested=True), 2907 ) 2908 2909 def _parse_partition(self) -> t.Optional[exp.Partition]: 2910 if not self._match(TokenType.PARTITION): 2911 return None 2912 2913 return self.expression( 2914 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2915 ) 2916 2917 def _parse_value(self) -> t.Optional[exp.Tuple]: 2918 if self._match(TokenType.L_PAREN): 2919 expressions = self._parse_csv(self._parse_expression) 2920 self._match_r_paren() 2921 return self.expression(exp.Tuple, expressions=expressions) 2922 2923 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2924 expression = self._parse_expression() 2925 if expression: 2926 return self.expression(exp.Tuple, expressions=[expression]) 2927 return None 2928 2929 def _parse_projections(self) -> t.List[exp.Expression]: 2930 return self._parse_expressions() 2931 2932 def _parse_select( 2933 self, 2934 nested: bool = False, 2935 table: bool = False, 2936 parse_subquery_alias: bool = True, 2937 parse_set_operation: bool = True, 2938 ) -> t.Optional[exp.Expression]: 2939 cte = self._parse_with() 2940 2941 if cte: 2942 this = self._parse_statement() 2943 2944 if not this: 2945 self.raise_error("Failed to parse any statement following CTE") 2946 return cte 2947 2948 if "with" in this.arg_types: 2949 this.set("with", cte) 2950 else: 2951 self.raise_error(f"{this.key} does not support CTE") 2952 this = cte 2953 2954 return this 2955 2956 # duckdb supports leading with FROM x 2957 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2958 2959 if self._match(TokenType.SELECT): 2960 comments = self._prev_comments 2961 2962 hint = self._parse_hint() 2963 2964 if self._next and not self._next.token_type == TokenType.DOT: 2965 all_ = self._match(TokenType.ALL) 2966 distinct = self._match_set(self.DISTINCT_TOKENS) 2967 else: 2968 all_, distinct = None, None 2969 2970 kind = ( 2971 self._match(TokenType.ALIAS) 2972 and self._match_texts(("STRUCT", "VALUE")) 2973 and self._prev.text.upper() 2974 ) 2975 2976 if distinct: 2977 distinct = self.expression( 2978 exp.Distinct, 2979 on=self._parse_value() if self._match(TokenType.ON) else None, 2980 ) 2981 2982 if all_ and distinct: 2983 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2984 2985 operation_modifiers = [] 2986 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2987 operation_modifiers.append(exp.var(self._prev.text.upper())) 2988 2989 limit = self._parse_limit(top=True) 2990 projections = self._parse_projections() 2991 2992 this = self.expression( 2993 exp.Select, 2994 kind=kind, 2995 hint=hint, 2996 distinct=distinct, 2997 expressions=projections, 2998 limit=limit, 2999 operation_modifiers=operation_modifiers or None, 3000 ) 3001 this.comments = comments 3002 3003 into = self._parse_into() 3004 if into: 3005 this.set("into", into) 3006 3007 if not from_: 3008 from_ = self._parse_from() 3009 3010 if from_: 3011 this.set("from", from_) 3012 3013 this = self._parse_query_modifiers(this) 3014 elif (table or nested) and self._match(TokenType.L_PAREN): 3015 if self._match(TokenType.PIVOT): 3016 this = self._parse_simplified_pivot() 3017 elif self._match(TokenType.FROM): 3018 this = exp.select("*").from_( 3019 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3020 ) 3021 else: 3022 this = ( 3023 self._parse_table() 3024 if table 3025 else self._parse_select(nested=True, parse_set_operation=False) 3026 ) 3027 3028 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3029 # in case a modifier (e.g. join) is following 3030 if table and isinstance(this, exp.Values) and this.alias: 3031 alias = this.args["alias"].pop() 3032 this = exp.Table(this=this, alias=alias) 3033 3034 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3035 3036 self._match_r_paren() 3037 3038 # We return early here so that the UNION isn't attached to the subquery by the 3039 # following call to _parse_set_operations, but instead becomes the parent node 3040 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3041 elif self._match(TokenType.VALUES, advance=False): 3042 this = self._parse_derived_table_values() 3043 elif from_: 3044 this = exp.select("*").from_(from_.this, copy=False) 3045 elif self._match(TokenType.SUMMARIZE): 3046 table = self._match(TokenType.TABLE) 3047 this = self._parse_select() or self._parse_string() or self._parse_table() 3048 return self.expression(exp.Summarize, this=this, table=table) 3049 elif self._match(TokenType.DESCRIBE): 3050 this = self._parse_describe() 3051 elif self._match_text_seq("STREAM"): 3052 this = self.expression(exp.Stream, this=self._parse_function()) 3053 else: 3054 this = None 3055 3056 return self._parse_set_operations(this) if parse_set_operation else this 3057 3058 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3059 if not skip_with_token and not self._match(TokenType.WITH): 3060 return None 3061 3062 comments = self._prev_comments 3063 recursive = self._match(TokenType.RECURSIVE) 3064 3065 last_comments = None 3066 expressions = [] 3067 while True: 3068 expressions.append(self._parse_cte()) 3069 if last_comments: 3070 expressions[-1].add_comments(last_comments) 3071 3072 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3073 break 3074 else: 3075 self._match(TokenType.WITH) 3076 3077 last_comments = self._prev_comments 3078 3079 return self.expression( 3080 exp.With, comments=comments, expressions=expressions, recursive=recursive 3081 ) 3082 3083 def _parse_cte(self) -> exp.CTE: 3084 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3085 if not alias or not alias.this: 3086 self.raise_error("Expected CTE to have alias") 3087 3088 self._match(TokenType.ALIAS) 3089 comments = self._prev_comments 3090 3091 if self._match_text_seq("NOT", "MATERIALIZED"): 3092 materialized = False 3093 elif self._match_text_seq("MATERIALIZED"): 3094 materialized = True 3095 else: 3096 materialized = None 3097 3098 return self.expression( 3099 exp.CTE, 3100 this=self._parse_wrapped(self._parse_statement), 3101 alias=alias, 3102 materialized=materialized, 3103 comments=comments, 3104 ) 3105 3106 def _parse_table_alias( 3107 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3108 ) -> t.Optional[exp.TableAlias]: 3109 any_token = self._match(TokenType.ALIAS) 3110 alias = ( 3111 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3112 or self._parse_string_as_identifier() 3113 ) 3114 3115 index = self._index 3116 if self._match(TokenType.L_PAREN): 3117 columns = self._parse_csv(self._parse_function_parameter) 3118 self._match_r_paren() if columns else self._retreat(index) 3119 else: 3120 columns = None 3121 3122 if not alias and not columns: 3123 return None 3124 3125 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3126 3127 # We bubble up comments from the Identifier to the TableAlias 3128 if isinstance(alias, exp.Identifier): 3129 table_alias.add_comments(alias.pop_comments()) 3130 3131 return table_alias 3132 3133 def _parse_subquery( 3134 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3135 ) -> t.Optional[exp.Subquery]: 3136 if not this: 3137 return None 3138 3139 return self.expression( 3140 exp.Subquery, 3141 this=this, 3142 pivots=self._parse_pivots(), 3143 alias=self._parse_table_alias() if parse_alias else None, 3144 sample=self._parse_table_sample(), 3145 ) 3146 3147 def _implicit_unnests_to_explicit(self, this: E) -> E: 3148 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3149 3150 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3151 for i, join in enumerate(this.args.get("joins") or []): 3152 table = join.this 3153 normalized_table = table.copy() 3154 normalized_table.meta["maybe_column"] = True 3155 normalized_table = _norm(normalized_table, dialect=self.dialect) 3156 3157 if isinstance(table, exp.Table) and not join.args.get("on"): 3158 if normalized_table.parts[0].name in refs: 3159 table_as_column = table.to_column() 3160 unnest = exp.Unnest(expressions=[table_as_column]) 3161 3162 # Table.to_column creates a parent Alias node that we want to convert to 3163 # a TableAlias and attach to the Unnest, so it matches the parser's output 3164 if isinstance(table.args.get("alias"), exp.TableAlias): 3165 table_as_column.replace(table_as_column.this) 3166 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3167 3168 table.replace(unnest) 3169 3170 refs.add(normalized_table.alias_or_name) 3171 3172 return this 3173 3174 def _parse_query_modifiers( 3175 self, this: t.Optional[exp.Expression] 3176 ) -> t.Optional[exp.Expression]: 3177 if isinstance(this, (exp.Query, exp.Table)): 3178 for join in self._parse_joins(): 3179 this.append("joins", join) 3180 for lateral in iter(self._parse_lateral, None): 3181 this.append("laterals", lateral) 3182 3183 while True: 3184 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3185 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3186 key, expression = parser(self) 3187 3188 if expression: 3189 this.set(key, expression) 3190 if key == "limit": 3191 offset = expression.args.pop("offset", None) 3192 3193 if offset: 3194 offset = exp.Offset(expression=offset) 3195 this.set("offset", offset) 3196 3197 limit_by_expressions = expression.expressions 3198 expression.set("expressions", None) 3199 offset.set("expressions", limit_by_expressions) 3200 continue 3201 break 3202 3203 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3204 this = self._implicit_unnests_to_explicit(this) 3205 3206 return this 3207 3208 def _parse_hint(self) -> t.Optional[exp.Hint]: 3209 if self._match(TokenType.HINT): 3210 hints = [] 3211 for hint in iter( 3212 lambda: self._parse_csv( 3213 lambda: self._parse_function() or self._parse_var(upper=True) 3214 ), 3215 [], 3216 ): 3217 hints.extend(hint) 3218 3219 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3220 self.raise_error("Expected */ after HINT") 3221 3222 return self.expression(exp.Hint, expressions=hints) 3223 3224 return None 3225 3226 def _parse_into(self) -> t.Optional[exp.Into]: 3227 if not self._match(TokenType.INTO): 3228 return None 3229 3230 temp = self._match(TokenType.TEMPORARY) 3231 unlogged = self._match_text_seq("UNLOGGED") 3232 self._match(TokenType.TABLE) 3233 3234 return self.expression( 3235 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3236 ) 3237 3238 def _parse_from( 3239 self, joins: bool = False, skip_from_token: bool = False 3240 ) -> t.Optional[exp.From]: 3241 if not skip_from_token and not self._match(TokenType.FROM): 3242 return None 3243 3244 return self.expression( 3245 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3246 ) 3247 3248 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3249 return self.expression( 3250 exp.MatchRecognizeMeasure, 3251 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3252 this=self._parse_expression(), 3253 ) 3254 3255 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3256 if not self._match(TokenType.MATCH_RECOGNIZE): 3257 return None 3258 3259 self._match_l_paren() 3260 3261 partition = self._parse_partition_by() 3262 order = self._parse_order() 3263 3264 measures = ( 3265 self._parse_csv(self._parse_match_recognize_measure) 3266 if self._match_text_seq("MEASURES") 3267 else None 3268 ) 3269 3270 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3271 rows = exp.var("ONE ROW PER MATCH") 3272 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3273 text = "ALL ROWS PER MATCH" 3274 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3275 text += " SHOW EMPTY MATCHES" 3276 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3277 text += " OMIT EMPTY MATCHES" 3278 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3279 text += " WITH UNMATCHED ROWS" 3280 rows = exp.var(text) 3281 else: 3282 rows = None 3283 3284 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3285 text = "AFTER MATCH SKIP" 3286 if self._match_text_seq("PAST", "LAST", "ROW"): 3287 text += " PAST LAST ROW" 3288 elif self._match_text_seq("TO", "NEXT", "ROW"): 3289 text += " TO NEXT ROW" 3290 elif self._match_text_seq("TO", "FIRST"): 3291 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3292 elif self._match_text_seq("TO", "LAST"): 3293 text += f" TO LAST {self._advance_any().text}" # type: ignore 3294 after = exp.var(text) 3295 else: 3296 after = None 3297 3298 if self._match_text_seq("PATTERN"): 3299 self._match_l_paren() 3300 3301 if not self._curr: 3302 self.raise_error("Expecting )", self._curr) 3303 3304 paren = 1 3305 start = self._curr 3306 3307 while self._curr and paren > 0: 3308 if self._curr.token_type == TokenType.L_PAREN: 3309 paren += 1 3310 if self._curr.token_type == TokenType.R_PAREN: 3311 paren -= 1 3312 3313 end = self._prev 3314 self._advance() 3315 3316 if paren > 0: 3317 self.raise_error("Expecting )", self._curr) 3318 3319 pattern = exp.var(self._find_sql(start, end)) 3320 else: 3321 pattern = None 3322 3323 define = ( 3324 self._parse_csv(self._parse_name_as_expression) 3325 if self._match_text_seq("DEFINE") 3326 else None 3327 ) 3328 3329 self._match_r_paren() 3330 3331 return self.expression( 3332 exp.MatchRecognize, 3333 partition_by=partition, 3334 order=order, 3335 measures=measures, 3336 rows=rows, 3337 after=after, 3338 pattern=pattern, 3339 define=define, 3340 alias=self._parse_table_alias(), 3341 ) 3342 3343 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3344 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3345 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3346 cross_apply = False 3347 3348 if cross_apply is not None: 3349 this = self._parse_select(table=True) 3350 view = None 3351 outer = None 3352 elif self._match(TokenType.LATERAL): 3353 this = self._parse_select(table=True) 3354 view = self._match(TokenType.VIEW) 3355 outer = self._match(TokenType.OUTER) 3356 else: 3357 return None 3358 3359 if not this: 3360 this = ( 3361 self._parse_unnest() 3362 or self._parse_function() 3363 or self._parse_id_var(any_token=False) 3364 ) 3365 3366 while self._match(TokenType.DOT): 3367 this = exp.Dot( 3368 this=this, 3369 expression=self._parse_function() or self._parse_id_var(any_token=False), 3370 ) 3371 3372 if view: 3373 table = self._parse_id_var(any_token=False) 3374 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3375 table_alias: t.Optional[exp.TableAlias] = self.expression( 3376 exp.TableAlias, this=table, columns=columns 3377 ) 3378 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3379 # We move the alias from the lateral's child node to the lateral itself 3380 table_alias = this.args["alias"].pop() 3381 else: 3382 table_alias = self._parse_table_alias() 3383 3384 return self.expression( 3385 exp.Lateral, 3386 this=this, 3387 view=view, 3388 outer=outer, 3389 alias=table_alias, 3390 cross_apply=cross_apply, 3391 ) 3392 3393 def _parse_join_parts( 3394 self, 3395 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3396 return ( 3397 self._match_set(self.JOIN_METHODS) and self._prev, 3398 self._match_set(self.JOIN_SIDES) and self._prev, 3399 self._match_set(self.JOIN_KINDS) and self._prev, 3400 ) 3401 3402 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3403 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3404 this = self._parse_column() 3405 if isinstance(this, exp.Column): 3406 return this.this 3407 return this 3408 3409 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3410 3411 def _parse_join( 3412 self, skip_join_token: bool = False, parse_bracket: bool = False 3413 ) -> t.Optional[exp.Join]: 3414 if self._match(TokenType.COMMA): 3415 return self.expression(exp.Join, this=self._parse_table()) 3416 3417 index = self._index 3418 method, side, kind = self._parse_join_parts() 3419 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3420 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3421 3422 if not skip_join_token and not join: 3423 self._retreat(index) 3424 kind = None 3425 method = None 3426 side = None 3427 3428 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3429 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3430 3431 if not skip_join_token and not join and not outer_apply and not cross_apply: 3432 return None 3433 3434 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3435 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3436 kwargs["expressions"] = self._parse_csv( 3437 lambda: self._parse_table(parse_bracket=parse_bracket) 3438 ) 3439 3440 if method: 3441 kwargs["method"] = method.text 3442 if side: 3443 kwargs["side"] = side.text 3444 if kind: 3445 kwargs["kind"] = kind.text 3446 if hint: 3447 kwargs["hint"] = hint 3448 3449 if self._match(TokenType.MATCH_CONDITION): 3450 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3451 3452 if self._match(TokenType.ON): 3453 kwargs["on"] = self._parse_assignment() 3454 elif self._match(TokenType.USING): 3455 kwargs["using"] = self._parse_using_identifiers() 3456 elif ( 3457 not (outer_apply or cross_apply) 3458 and not isinstance(kwargs["this"], exp.Unnest) 3459 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3460 ): 3461 index = self._index 3462 joins: t.Optional[list] = list(self._parse_joins()) 3463 3464 if joins and self._match(TokenType.ON): 3465 kwargs["on"] = self._parse_assignment() 3466 elif joins and self._match(TokenType.USING): 3467 kwargs["using"] = self._parse_using_identifiers() 3468 else: 3469 joins = None 3470 self._retreat(index) 3471 3472 kwargs["this"].set("joins", joins if joins else None) 3473 3474 comments = [c for token in (method, side, kind) if token for c in token.comments] 3475 return self.expression(exp.Join, comments=comments, **kwargs) 3476 3477 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3478 this = self._parse_assignment() 3479 3480 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3481 return this 3482 3483 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3484 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3485 3486 return this 3487 3488 def _parse_index_params(self) -> exp.IndexParameters: 3489 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3490 3491 if self._match(TokenType.L_PAREN, advance=False): 3492 columns = self._parse_wrapped_csv(self._parse_with_operator) 3493 else: 3494 columns = None 3495 3496 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3497 partition_by = self._parse_partition_by() 3498 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3499 tablespace = ( 3500 self._parse_var(any_token=True) 3501 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3502 else None 3503 ) 3504 where = self._parse_where() 3505 3506 on = self._parse_field() if self._match(TokenType.ON) else None 3507 3508 return self.expression( 3509 exp.IndexParameters, 3510 using=using, 3511 columns=columns, 3512 include=include, 3513 partition_by=partition_by, 3514 where=where, 3515 with_storage=with_storage, 3516 tablespace=tablespace, 3517 on=on, 3518 ) 3519 3520 def _parse_index( 3521 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3522 ) -> t.Optional[exp.Index]: 3523 if index or anonymous: 3524 unique = None 3525 primary = None 3526 amp = None 3527 3528 self._match(TokenType.ON) 3529 self._match(TokenType.TABLE) # hive 3530 table = self._parse_table_parts(schema=True) 3531 else: 3532 unique = self._match(TokenType.UNIQUE) 3533 primary = self._match_text_seq("PRIMARY") 3534 amp = self._match_text_seq("AMP") 3535 3536 if not self._match(TokenType.INDEX): 3537 return None 3538 3539 index = self._parse_id_var() 3540 table = None 3541 3542 params = self._parse_index_params() 3543 3544 return self.expression( 3545 exp.Index, 3546 this=index, 3547 table=table, 3548 unique=unique, 3549 primary=primary, 3550 amp=amp, 3551 params=params, 3552 ) 3553 3554 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3555 hints: t.List[exp.Expression] = [] 3556 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3557 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3558 hints.append( 3559 self.expression( 3560 exp.WithTableHint, 3561 expressions=self._parse_csv( 3562 lambda: self._parse_function() or self._parse_var(any_token=True) 3563 ), 3564 ) 3565 ) 3566 self._match_r_paren() 3567 else: 3568 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3569 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3570 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3571 3572 self._match_set((TokenType.INDEX, TokenType.KEY)) 3573 if self._match(TokenType.FOR): 3574 hint.set("target", self._advance_any() and self._prev.text.upper()) 3575 3576 hint.set("expressions", self._parse_wrapped_id_vars()) 3577 hints.append(hint) 3578 3579 return hints or None 3580 3581 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3582 return ( 3583 (not schema and self._parse_function(optional_parens=False)) 3584 or self._parse_id_var(any_token=False) 3585 or self._parse_string_as_identifier() 3586 or self._parse_placeholder() 3587 ) 3588 3589 def _parse_table_parts( 3590 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3591 ) -> exp.Table: 3592 catalog = None 3593 db = None 3594 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3595 3596 while self._match(TokenType.DOT): 3597 if catalog: 3598 # This allows nesting the table in arbitrarily many dot expressions if needed 3599 table = self.expression( 3600 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3601 ) 3602 else: 3603 catalog = db 3604 db = table 3605 # "" used for tsql FROM a..b case 3606 table = self._parse_table_part(schema=schema) or "" 3607 3608 if ( 3609 wildcard 3610 and self._is_connected() 3611 and (isinstance(table, exp.Identifier) or not table) 3612 and self._match(TokenType.STAR) 3613 ): 3614 if isinstance(table, exp.Identifier): 3615 table.args["this"] += "*" 3616 else: 3617 table = exp.Identifier(this="*") 3618 3619 # We bubble up comments from the Identifier to the Table 3620 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3621 3622 if is_db_reference: 3623 catalog = db 3624 db = table 3625 table = None 3626 3627 if not table and not is_db_reference: 3628 self.raise_error(f"Expected table name but got {self._curr}") 3629 if not db and is_db_reference: 3630 self.raise_error(f"Expected database name but got {self._curr}") 3631 3632 table = self.expression( 3633 exp.Table, 3634 comments=comments, 3635 this=table, 3636 db=db, 3637 catalog=catalog, 3638 ) 3639 3640 changes = self._parse_changes() 3641 if changes: 3642 table.set("changes", changes) 3643 3644 at_before = self._parse_historical_data() 3645 if at_before: 3646 table.set("when", at_before) 3647 3648 pivots = self._parse_pivots() 3649 if pivots: 3650 table.set("pivots", pivots) 3651 3652 return table 3653 3654 def _parse_table( 3655 self, 3656 schema: bool = False, 3657 joins: bool = False, 3658 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3659 parse_bracket: bool = False, 3660 is_db_reference: bool = False, 3661 parse_partition: bool = False, 3662 ) -> t.Optional[exp.Expression]: 3663 lateral = self._parse_lateral() 3664 if lateral: 3665 return lateral 3666 3667 unnest = self._parse_unnest() 3668 if unnest: 3669 return unnest 3670 3671 values = self._parse_derived_table_values() 3672 if values: 3673 return values 3674 3675 subquery = self._parse_select(table=True) 3676 if subquery: 3677 if not subquery.args.get("pivots"): 3678 subquery.set("pivots", self._parse_pivots()) 3679 return subquery 3680 3681 bracket = parse_bracket and self._parse_bracket(None) 3682 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3683 3684 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3685 self._parse_table 3686 ) 3687 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3688 3689 only = self._match(TokenType.ONLY) 3690 3691 this = t.cast( 3692 exp.Expression, 3693 bracket 3694 or rows_from 3695 or self._parse_bracket( 3696 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3697 ), 3698 ) 3699 3700 if only: 3701 this.set("only", only) 3702 3703 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3704 self._match_text_seq("*") 3705 3706 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3707 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3708 this.set("partition", self._parse_partition()) 3709 3710 if schema: 3711 return self._parse_schema(this=this) 3712 3713 version = self._parse_version() 3714 3715 if version: 3716 this.set("version", version) 3717 3718 if self.dialect.ALIAS_POST_TABLESAMPLE: 3719 this.set("sample", self._parse_table_sample()) 3720 3721 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3722 if alias: 3723 this.set("alias", alias) 3724 3725 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3726 return self.expression( 3727 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3728 ) 3729 3730 this.set("hints", self._parse_table_hints()) 3731 3732 if not this.args.get("pivots"): 3733 this.set("pivots", self._parse_pivots()) 3734 3735 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3736 this.set("sample", self._parse_table_sample()) 3737 3738 if joins: 3739 for join in self._parse_joins(): 3740 this.append("joins", join) 3741 3742 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3743 this.set("ordinality", True) 3744 this.set("alias", self._parse_table_alias()) 3745 3746 return this 3747 3748 def _parse_version(self) -> t.Optional[exp.Version]: 3749 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3750 this = "TIMESTAMP" 3751 elif self._match(TokenType.VERSION_SNAPSHOT): 3752 this = "VERSION" 3753 else: 3754 return None 3755 3756 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3757 kind = self._prev.text.upper() 3758 start = self._parse_bitwise() 3759 self._match_texts(("TO", "AND")) 3760 end = self._parse_bitwise() 3761 expression: t.Optional[exp.Expression] = self.expression( 3762 exp.Tuple, expressions=[start, end] 3763 ) 3764 elif self._match_text_seq("CONTAINED", "IN"): 3765 kind = "CONTAINED IN" 3766 expression = self.expression( 3767 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3768 ) 3769 elif self._match(TokenType.ALL): 3770 kind = "ALL" 3771 expression = None 3772 else: 3773 self._match_text_seq("AS", "OF") 3774 kind = "AS OF" 3775 expression = self._parse_type() 3776 3777 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3778 3779 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3780 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3781 index = self._index 3782 historical_data = None 3783 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3784 this = self._prev.text.upper() 3785 kind = ( 3786 self._match(TokenType.L_PAREN) 3787 and self._match_texts(self.HISTORICAL_DATA_KIND) 3788 and self._prev.text.upper() 3789 ) 3790 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3791 3792 if expression: 3793 self._match_r_paren() 3794 historical_data = self.expression( 3795 exp.HistoricalData, this=this, kind=kind, expression=expression 3796 ) 3797 else: 3798 self._retreat(index) 3799 3800 return historical_data 3801 3802 def _parse_changes(self) -> t.Optional[exp.Changes]: 3803 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3804 return None 3805 3806 information = self._parse_var(any_token=True) 3807 self._match_r_paren() 3808 3809 return self.expression( 3810 exp.Changes, 3811 information=information, 3812 at_before=self._parse_historical_data(), 3813 end=self._parse_historical_data(), 3814 ) 3815 3816 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3817 if not self._match(TokenType.UNNEST): 3818 return None 3819 3820 expressions = self._parse_wrapped_csv(self._parse_equality) 3821 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3822 3823 alias = self._parse_table_alias() if with_alias else None 3824 3825 if alias: 3826 if self.dialect.UNNEST_COLUMN_ONLY: 3827 if alias.args.get("columns"): 3828 self.raise_error("Unexpected extra column alias in unnest.") 3829 3830 alias.set("columns", [alias.this]) 3831 alias.set("this", None) 3832 3833 columns = alias.args.get("columns") or [] 3834 if offset and len(expressions) < len(columns): 3835 offset = columns.pop() 3836 3837 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3838 self._match(TokenType.ALIAS) 3839 offset = self._parse_id_var( 3840 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3841 ) or exp.to_identifier("offset") 3842 3843 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3844 3845 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3846 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3847 if not is_derived and not ( 3848 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3849 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3850 ): 3851 return None 3852 3853 expressions = self._parse_csv(self._parse_value) 3854 alias = self._parse_table_alias() 3855 3856 if is_derived: 3857 self._match_r_paren() 3858 3859 return self.expression( 3860 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3861 ) 3862 3863 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3864 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3865 as_modifier and self._match_text_seq("USING", "SAMPLE") 3866 ): 3867 return None 3868 3869 bucket_numerator = None 3870 bucket_denominator = None 3871 bucket_field = None 3872 percent = None 3873 size = None 3874 seed = None 3875 3876 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3877 matched_l_paren = self._match(TokenType.L_PAREN) 3878 3879 if self.TABLESAMPLE_CSV: 3880 num = None 3881 expressions = self._parse_csv(self._parse_primary) 3882 else: 3883 expressions = None 3884 num = ( 3885 self._parse_factor() 3886 if self._match(TokenType.NUMBER, advance=False) 3887 else self._parse_primary() or self._parse_placeholder() 3888 ) 3889 3890 if self._match_text_seq("BUCKET"): 3891 bucket_numerator = self._parse_number() 3892 self._match_text_seq("OUT", "OF") 3893 bucket_denominator = bucket_denominator = self._parse_number() 3894 self._match(TokenType.ON) 3895 bucket_field = self._parse_field() 3896 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3897 percent = num 3898 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3899 size = num 3900 else: 3901 percent = num 3902 3903 if matched_l_paren: 3904 self._match_r_paren() 3905 3906 if self._match(TokenType.L_PAREN): 3907 method = self._parse_var(upper=True) 3908 seed = self._match(TokenType.COMMA) and self._parse_number() 3909 self._match_r_paren() 3910 elif self._match_texts(("SEED", "REPEATABLE")): 3911 seed = self._parse_wrapped(self._parse_number) 3912 3913 if not method and self.DEFAULT_SAMPLING_METHOD: 3914 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3915 3916 return self.expression( 3917 exp.TableSample, 3918 expressions=expressions, 3919 method=method, 3920 bucket_numerator=bucket_numerator, 3921 bucket_denominator=bucket_denominator, 3922 bucket_field=bucket_field, 3923 percent=percent, 3924 size=size, 3925 seed=seed, 3926 ) 3927 3928 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3929 return list(iter(self._parse_pivot, None)) or None 3930 3931 def _parse_joins(self) -> t.Iterator[exp.Join]: 3932 return iter(self._parse_join, None) 3933 3934 # https://duckdb.org/docs/sql/statements/pivot 3935 def _parse_simplified_pivot(self) -> exp.Pivot: 3936 def _parse_on() -> t.Optional[exp.Expression]: 3937 this = self._parse_bitwise() 3938 return self._parse_in(this) if self._match(TokenType.IN) else this 3939 3940 this = self._parse_table() 3941 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3942 using = self._match(TokenType.USING) and self._parse_csv( 3943 lambda: self._parse_alias(self._parse_function()) 3944 ) 3945 group = self._parse_group() 3946 return self.expression( 3947 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3948 ) 3949 3950 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3951 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3952 this = self._parse_select_or_expression() 3953 3954 self._match(TokenType.ALIAS) 3955 alias = self._parse_bitwise() 3956 if alias: 3957 if isinstance(alias, exp.Column) and not alias.db: 3958 alias = alias.this 3959 return self.expression(exp.PivotAlias, this=this, alias=alias) 3960 3961 return this 3962 3963 value = self._parse_column() 3964 3965 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3966 self.raise_error("Expecting IN (") 3967 3968 if self._match(TokenType.ANY): 3969 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3970 else: 3971 exprs = self._parse_csv(_parse_aliased_expression) 3972 3973 self._match_r_paren() 3974 return self.expression(exp.In, this=value, expressions=exprs) 3975 3976 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3977 index = self._index 3978 include_nulls = None 3979 3980 if self._match(TokenType.PIVOT): 3981 unpivot = False 3982 elif self._match(TokenType.UNPIVOT): 3983 unpivot = True 3984 3985 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3986 if self._match_text_seq("INCLUDE", "NULLS"): 3987 include_nulls = True 3988 elif self._match_text_seq("EXCLUDE", "NULLS"): 3989 include_nulls = False 3990 else: 3991 return None 3992 3993 expressions = [] 3994 3995 if not self._match(TokenType.L_PAREN): 3996 self._retreat(index) 3997 return None 3998 3999 if unpivot: 4000 expressions = self._parse_csv(self._parse_column) 4001 else: 4002 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4003 4004 if not expressions: 4005 self.raise_error("Failed to parse PIVOT's aggregation list") 4006 4007 if not self._match(TokenType.FOR): 4008 self.raise_error("Expecting FOR") 4009 4010 field = self._parse_pivot_in() 4011 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4012 self._parse_bitwise 4013 ) 4014 4015 self._match_r_paren() 4016 4017 pivot = self.expression( 4018 exp.Pivot, 4019 expressions=expressions, 4020 field=field, 4021 unpivot=unpivot, 4022 include_nulls=include_nulls, 4023 default_on_null=default_on_null, 4024 ) 4025 4026 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4027 pivot.set("alias", self._parse_table_alias()) 4028 4029 if not unpivot: 4030 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4031 4032 columns: t.List[exp.Expression] = [] 4033 for fld in pivot.args["field"].expressions: 4034 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4035 for name in names: 4036 if self.PREFIXED_PIVOT_COLUMNS: 4037 name = f"{name}_{field_name}" if name else field_name 4038 else: 4039 name = f"{field_name}_{name}" if name else field_name 4040 4041 columns.append(exp.to_identifier(name)) 4042 4043 pivot.set("columns", columns) 4044 4045 return pivot 4046 4047 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4048 return [agg.alias for agg in aggregations] 4049 4050 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4051 if not skip_where_token and not self._match(TokenType.PREWHERE): 4052 return None 4053 4054 return self.expression( 4055 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4056 ) 4057 4058 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4059 if not skip_where_token and not self._match(TokenType.WHERE): 4060 return None 4061 4062 return self.expression( 4063 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4064 ) 4065 4066 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4067 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4068 return None 4069 4070 elements: t.Dict[str, t.Any] = defaultdict(list) 4071 4072 if self._match(TokenType.ALL): 4073 elements["all"] = True 4074 elif self._match(TokenType.DISTINCT): 4075 elements["all"] = False 4076 4077 while True: 4078 index = self._index 4079 4080 elements["expressions"].extend( 4081 self._parse_csv( 4082 lambda: None 4083 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4084 else self._parse_assignment() 4085 ) 4086 ) 4087 4088 before_with_index = self._index 4089 with_prefix = self._match(TokenType.WITH) 4090 4091 if self._match(TokenType.ROLLUP): 4092 elements["rollup"].append( 4093 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4094 ) 4095 elif self._match(TokenType.CUBE): 4096 elements["cube"].append( 4097 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4098 ) 4099 elif self._match(TokenType.GROUPING_SETS): 4100 elements["grouping_sets"].append( 4101 self.expression( 4102 exp.GroupingSets, 4103 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4104 ) 4105 ) 4106 elif self._match_text_seq("TOTALS"): 4107 elements["totals"] = True # type: ignore 4108 4109 if before_with_index <= self._index <= before_with_index + 1: 4110 self._retreat(before_with_index) 4111 break 4112 4113 if index == self._index: 4114 break 4115 4116 return self.expression(exp.Group, **elements) # type: ignore 4117 4118 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4119 return self.expression( 4120 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4121 ) 4122 4123 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4124 if self._match(TokenType.L_PAREN): 4125 grouping_set = self._parse_csv(self._parse_column) 4126 self._match_r_paren() 4127 return self.expression(exp.Tuple, expressions=grouping_set) 4128 4129 return self._parse_column() 4130 4131 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4132 if not skip_having_token and not self._match(TokenType.HAVING): 4133 return None 4134 return self.expression(exp.Having, this=self._parse_assignment()) 4135 4136 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4137 if not self._match(TokenType.QUALIFY): 4138 return None 4139 return self.expression(exp.Qualify, this=self._parse_assignment()) 4140 4141 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4142 if skip_start_token: 4143 start = None 4144 elif self._match(TokenType.START_WITH): 4145 start = self._parse_assignment() 4146 else: 4147 return None 4148 4149 self._match(TokenType.CONNECT_BY) 4150 nocycle = self._match_text_seq("NOCYCLE") 4151 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4152 exp.Prior, this=self._parse_bitwise() 4153 ) 4154 connect = self._parse_assignment() 4155 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4156 4157 if not start and self._match(TokenType.START_WITH): 4158 start = self._parse_assignment() 4159 4160 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4161 4162 def _parse_name_as_expression(self) -> exp.Alias: 4163 return self.expression( 4164 exp.Alias, 4165 alias=self._parse_id_var(any_token=True), 4166 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4167 ) 4168 4169 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4170 if self._match_text_seq("INTERPOLATE"): 4171 return self._parse_wrapped_csv(self._parse_name_as_expression) 4172 return None 4173 4174 def _parse_order( 4175 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4176 ) -> t.Optional[exp.Expression]: 4177 siblings = None 4178 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4179 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4180 return this 4181 4182 siblings = True 4183 4184 return self.expression( 4185 exp.Order, 4186 this=this, 4187 expressions=self._parse_csv(self._parse_ordered), 4188 siblings=siblings, 4189 ) 4190 4191 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4192 if not self._match(token): 4193 return None 4194 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4195 4196 def _parse_ordered( 4197 self, parse_method: t.Optional[t.Callable] = None 4198 ) -> t.Optional[exp.Ordered]: 4199 this = parse_method() if parse_method else self._parse_assignment() 4200 if not this: 4201 return None 4202 4203 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4204 this = exp.var("ALL") 4205 4206 asc = self._match(TokenType.ASC) 4207 desc = self._match(TokenType.DESC) or (asc and False) 4208 4209 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4210 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4211 4212 nulls_first = is_nulls_first or False 4213 explicitly_null_ordered = is_nulls_first or is_nulls_last 4214 4215 if ( 4216 not explicitly_null_ordered 4217 and ( 4218 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4219 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4220 ) 4221 and self.dialect.NULL_ORDERING != "nulls_are_last" 4222 ): 4223 nulls_first = True 4224 4225 if self._match_text_seq("WITH", "FILL"): 4226 with_fill = self.expression( 4227 exp.WithFill, 4228 **{ # type: ignore 4229 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4230 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4231 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4232 "interpolate": self._parse_interpolate(), 4233 }, 4234 ) 4235 else: 4236 with_fill = None 4237 4238 return self.expression( 4239 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4240 ) 4241 4242 def _parse_limit( 4243 self, 4244 this: t.Optional[exp.Expression] = None, 4245 top: bool = False, 4246 skip_limit_token: bool = False, 4247 ) -> t.Optional[exp.Expression]: 4248 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4249 comments = self._prev_comments 4250 if top: 4251 limit_paren = self._match(TokenType.L_PAREN) 4252 expression = self._parse_term() if limit_paren else self._parse_number() 4253 4254 if limit_paren: 4255 self._match_r_paren() 4256 else: 4257 expression = self._parse_term() 4258 4259 if self._match(TokenType.COMMA): 4260 offset = expression 4261 expression = self._parse_term() 4262 else: 4263 offset = None 4264 4265 limit_exp = self.expression( 4266 exp.Limit, 4267 this=this, 4268 expression=expression, 4269 offset=offset, 4270 comments=comments, 4271 expressions=self._parse_limit_by(), 4272 ) 4273 4274 return limit_exp 4275 4276 if self._match(TokenType.FETCH): 4277 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4278 direction = self._prev.text.upper() if direction else "FIRST" 4279 4280 count = self._parse_field(tokens=self.FETCH_TOKENS) 4281 percent = self._match(TokenType.PERCENT) 4282 4283 self._match_set((TokenType.ROW, TokenType.ROWS)) 4284 4285 only = self._match_text_seq("ONLY") 4286 with_ties = self._match_text_seq("WITH", "TIES") 4287 4288 if only and with_ties: 4289 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4290 4291 return self.expression( 4292 exp.Fetch, 4293 direction=direction, 4294 count=count, 4295 percent=percent, 4296 with_ties=with_ties, 4297 ) 4298 4299 return this 4300 4301 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4302 if not self._match(TokenType.OFFSET): 4303 return this 4304 4305 count = self._parse_term() 4306 self._match_set((TokenType.ROW, TokenType.ROWS)) 4307 4308 return self.expression( 4309 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4310 ) 4311 4312 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4313 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4314 4315 def _parse_locks(self) -> t.List[exp.Lock]: 4316 locks = [] 4317 while True: 4318 if self._match_text_seq("FOR", "UPDATE"): 4319 update = True 4320 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4321 "LOCK", "IN", "SHARE", "MODE" 4322 ): 4323 update = False 4324 else: 4325 break 4326 4327 expressions = None 4328 if self._match_text_seq("OF"): 4329 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4330 4331 wait: t.Optional[bool | exp.Expression] = None 4332 if self._match_text_seq("NOWAIT"): 4333 wait = True 4334 elif self._match_text_seq("WAIT"): 4335 wait = self._parse_primary() 4336 elif self._match_text_seq("SKIP", "LOCKED"): 4337 wait = False 4338 4339 locks.append( 4340 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4341 ) 4342 4343 return locks 4344 4345 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4346 while this and self._match_set(self.SET_OPERATIONS): 4347 token_type = self._prev.token_type 4348 4349 if token_type == TokenType.UNION: 4350 operation: t.Type[exp.SetOperation] = exp.Union 4351 elif token_type == TokenType.EXCEPT: 4352 operation = exp.Except 4353 else: 4354 operation = exp.Intersect 4355 4356 comments = self._prev.comments 4357 4358 if self._match(TokenType.DISTINCT): 4359 distinct: t.Optional[bool] = True 4360 elif self._match(TokenType.ALL): 4361 distinct = False 4362 else: 4363 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4364 if distinct is None: 4365 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4366 4367 by_name = self._match_text_seq("BY", "NAME") 4368 expression = self._parse_select(nested=True, parse_set_operation=False) 4369 4370 this = self.expression( 4371 operation, 4372 comments=comments, 4373 this=this, 4374 distinct=distinct, 4375 by_name=by_name, 4376 expression=expression, 4377 ) 4378 4379 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4380 expression = this.expression 4381 4382 if expression: 4383 for arg in self.SET_OP_MODIFIERS: 4384 expr = expression.args.get(arg) 4385 if expr: 4386 this.set(arg, expr.pop()) 4387 4388 return this 4389 4390 def _parse_expression(self) -> t.Optional[exp.Expression]: 4391 return self._parse_alias(self._parse_assignment()) 4392 4393 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4394 this = self._parse_disjunction() 4395 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4396 # This allows us to parse <non-identifier token> := <expr> 4397 this = exp.column( 4398 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4399 ) 4400 4401 while self._match_set(self.ASSIGNMENT): 4402 if isinstance(this, exp.Column) and len(this.parts) == 1: 4403 this = this.this 4404 4405 this = self.expression( 4406 self.ASSIGNMENT[self._prev.token_type], 4407 this=this, 4408 comments=self._prev_comments, 4409 expression=self._parse_assignment(), 4410 ) 4411 4412 return this 4413 4414 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4415 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4416 4417 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4418 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4419 4420 def _parse_equality(self) -> t.Optional[exp.Expression]: 4421 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4422 4423 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4424 return self._parse_tokens(self._parse_range, self.COMPARISON) 4425 4426 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4427 this = this or self._parse_bitwise() 4428 negate = self._match(TokenType.NOT) 4429 4430 if self._match_set(self.RANGE_PARSERS): 4431 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4432 if not expression: 4433 return this 4434 4435 this = expression 4436 elif self._match(TokenType.ISNULL): 4437 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4438 4439 # Postgres supports ISNULL and NOTNULL for conditions. 4440 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4441 if self._match(TokenType.NOTNULL): 4442 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4443 this = self.expression(exp.Not, this=this) 4444 4445 if negate: 4446 this = self._negate_range(this) 4447 4448 if self._match(TokenType.IS): 4449 this = self._parse_is(this) 4450 4451 return this 4452 4453 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4454 if not this: 4455 return this 4456 4457 return self.expression(exp.Not, this=this) 4458 4459 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4460 index = self._index - 1 4461 negate = self._match(TokenType.NOT) 4462 4463 if self._match_text_seq("DISTINCT", "FROM"): 4464 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4465 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4466 4467 if self._match(TokenType.JSON): 4468 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4469 4470 if self._match_text_seq("WITH"): 4471 _with = True 4472 elif self._match_text_seq("WITHOUT"): 4473 _with = False 4474 else: 4475 _with = None 4476 4477 unique = self._match(TokenType.UNIQUE) 4478 self._match_text_seq("KEYS") 4479 expression: t.Optional[exp.Expression] = self.expression( 4480 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4481 ) 4482 else: 4483 expression = self._parse_primary() or self._parse_null() 4484 if not expression: 4485 self._retreat(index) 4486 return None 4487 4488 this = self.expression(exp.Is, this=this, expression=expression) 4489 return self.expression(exp.Not, this=this) if negate else this 4490 4491 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4492 unnest = self._parse_unnest(with_alias=False) 4493 if unnest: 4494 this = self.expression(exp.In, this=this, unnest=unnest) 4495 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4496 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4497 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4498 4499 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4500 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4501 else: 4502 this = self.expression(exp.In, this=this, expressions=expressions) 4503 4504 if matched_l_paren: 4505 self._match_r_paren(this) 4506 elif not self._match(TokenType.R_BRACKET, expression=this): 4507 self.raise_error("Expecting ]") 4508 else: 4509 this = self.expression(exp.In, this=this, field=self._parse_column()) 4510 4511 return this 4512 4513 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4514 low = self._parse_bitwise() 4515 self._match(TokenType.AND) 4516 high = self._parse_bitwise() 4517 return self.expression(exp.Between, this=this, low=low, high=high) 4518 4519 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4520 if not self._match(TokenType.ESCAPE): 4521 return this 4522 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4523 4524 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4525 index = self._index 4526 4527 if not self._match(TokenType.INTERVAL) and match_interval: 4528 return None 4529 4530 if self._match(TokenType.STRING, advance=False): 4531 this = self._parse_primary() 4532 else: 4533 this = self._parse_term() 4534 4535 if not this or ( 4536 isinstance(this, exp.Column) 4537 and not this.table 4538 and not this.this.quoted 4539 and this.name.upper() == "IS" 4540 ): 4541 self._retreat(index) 4542 return None 4543 4544 unit = self._parse_function() or ( 4545 not self._match(TokenType.ALIAS, advance=False) 4546 and self._parse_var(any_token=True, upper=True) 4547 ) 4548 4549 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4550 # each INTERVAL expression into this canonical form so it's easy to transpile 4551 if this and this.is_number: 4552 this = exp.Literal.string(this.to_py()) 4553 elif this and this.is_string: 4554 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4555 if len(parts) == 1: 4556 if unit: 4557 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4558 self._retreat(self._index - 1) 4559 4560 this = exp.Literal.string(parts[0][0]) 4561 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4562 4563 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4564 unit = self.expression( 4565 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4566 ) 4567 4568 interval = self.expression(exp.Interval, this=this, unit=unit) 4569 4570 index = self._index 4571 self._match(TokenType.PLUS) 4572 4573 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4574 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4575 return self.expression( 4576 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4577 ) 4578 4579 self._retreat(index) 4580 return interval 4581 4582 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4583 this = self._parse_term() 4584 4585 while True: 4586 if self._match_set(self.BITWISE): 4587 this = self.expression( 4588 self.BITWISE[self._prev.token_type], 4589 this=this, 4590 expression=self._parse_term(), 4591 ) 4592 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4593 this = self.expression( 4594 exp.DPipe, 4595 this=this, 4596 expression=self._parse_term(), 4597 safe=not self.dialect.STRICT_STRING_CONCAT, 4598 ) 4599 elif self._match(TokenType.DQMARK): 4600 this = self.expression( 4601 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4602 ) 4603 elif self._match_pair(TokenType.LT, TokenType.LT): 4604 this = self.expression( 4605 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4606 ) 4607 elif self._match_pair(TokenType.GT, TokenType.GT): 4608 this = self.expression( 4609 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4610 ) 4611 else: 4612 break 4613 4614 return this 4615 4616 def _parse_term(self) -> t.Optional[exp.Expression]: 4617 this = self._parse_factor() 4618 4619 while self._match_set(self.TERM): 4620 klass = self.TERM[self._prev.token_type] 4621 comments = self._prev_comments 4622 expression = self._parse_factor() 4623 4624 this = self.expression(klass, this=this, comments=comments, expression=expression) 4625 4626 if isinstance(this, exp.Collate): 4627 expr = this.expression 4628 4629 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4630 # fallback to Identifier / Var 4631 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4632 ident = expr.this 4633 if isinstance(ident, exp.Identifier): 4634 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4635 4636 return this 4637 4638 def _parse_factor(self) -> t.Optional[exp.Expression]: 4639 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4640 this = parse_method() 4641 4642 while self._match_set(self.FACTOR): 4643 klass = self.FACTOR[self._prev.token_type] 4644 comments = self._prev_comments 4645 expression = parse_method() 4646 4647 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4648 self._retreat(self._index - 1) 4649 return this 4650 4651 this = self.expression(klass, this=this, comments=comments, expression=expression) 4652 4653 if isinstance(this, exp.Div): 4654 this.args["typed"] = self.dialect.TYPED_DIVISION 4655 this.args["safe"] = self.dialect.SAFE_DIVISION 4656 4657 return this 4658 4659 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4660 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4661 4662 def _parse_unary(self) -> t.Optional[exp.Expression]: 4663 if self._match_set(self.UNARY_PARSERS): 4664 return self.UNARY_PARSERS[self._prev.token_type](self) 4665 return self._parse_at_time_zone(self._parse_type()) 4666 4667 def _parse_type( 4668 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4669 ) -> t.Optional[exp.Expression]: 4670 interval = parse_interval and self._parse_interval() 4671 if interval: 4672 return interval 4673 4674 index = self._index 4675 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4676 4677 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4678 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4679 if isinstance(data_type, exp.Cast): 4680 # This constructor can contain ops directly after it, for instance struct unnesting: 4681 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4682 return self._parse_column_ops(data_type) 4683 4684 if data_type: 4685 index2 = self._index 4686 this = self._parse_primary() 4687 4688 if isinstance(this, exp.Literal): 4689 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4690 if parser: 4691 return parser(self, this, data_type) 4692 4693 return self.expression(exp.Cast, this=this, to=data_type) 4694 4695 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4696 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4697 # 4698 # If the index difference here is greater than 1, that means the parser itself must have 4699 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4700 # 4701 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4702 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4703 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4704 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4705 # 4706 # In these cases, we don't really want to return the converted type, but instead retreat 4707 # and try to parse a Column or Identifier in the section below. 4708 if data_type.expressions and index2 - index > 1: 4709 self._retreat(index2) 4710 return self._parse_column_ops(data_type) 4711 4712 self._retreat(index) 4713 4714 if fallback_to_identifier: 4715 return self._parse_id_var() 4716 4717 this = self._parse_column() 4718 return this and self._parse_column_ops(this) 4719 4720 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4721 this = self._parse_type() 4722 if not this: 4723 return None 4724 4725 if isinstance(this, exp.Column) and not this.table: 4726 this = exp.var(this.name.upper()) 4727 4728 return self.expression( 4729 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4730 ) 4731 4732 def _parse_types( 4733 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4734 ) -> t.Optional[exp.Expression]: 4735 index = self._index 4736 4737 this: t.Optional[exp.Expression] = None 4738 prefix = self._match_text_seq("SYSUDTLIB", ".") 4739 4740 if not self._match_set(self.TYPE_TOKENS): 4741 identifier = allow_identifiers and self._parse_id_var( 4742 any_token=False, tokens=(TokenType.VAR,) 4743 ) 4744 if isinstance(identifier, exp.Identifier): 4745 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4746 4747 if len(tokens) != 1: 4748 self.raise_error("Unexpected identifier", self._prev) 4749 4750 if tokens[0].token_type in self.TYPE_TOKENS: 4751 self._prev = tokens[0] 4752 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4753 type_name = identifier.name 4754 4755 while self._match(TokenType.DOT): 4756 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4757 4758 this = exp.DataType.build(type_name, udt=True) 4759 else: 4760 self._retreat(self._index - 1) 4761 return None 4762 else: 4763 return None 4764 4765 type_token = self._prev.token_type 4766 4767 if type_token == TokenType.PSEUDO_TYPE: 4768 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4769 4770 if type_token == TokenType.OBJECT_IDENTIFIER: 4771 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4772 4773 # https://materialize.com/docs/sql/types/map/ 4774 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4775 key_type = self._parse_types( 4776 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4777 ) 4778 if not self._match(TokenType.FARROW): 4779 self._retreat(index) 4780 return None 4781 4782 value_type = self._parse_types( 4783 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4784 ) 4785 if not self._match(TokenType.R_BRACKET): 4786 self._retreat(index) 4787 return None 4788 4789 return exp.DataType( 4790 this=exp.DataType.Type.MAP, 4791 expressions=[key_type, value_type], 4792 nested=True, 4793 prefix=prefix, 4794 ) 4795 4796 nested = type_token in self.NESTED_TYPE_TOKENS 4797 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4798 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4799 expressions = None 4800 maybe_func = False 4801 4802 if self._match(TokenType.L_PAREN): 4803 if is_struct: 4804 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4805 elif nested: 4806 expressions = self._parse_csv( 4807 lambda: self._parse_types( 4808 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4809 ) 4810 ) 4811 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4812 this = expressions[0] 4813 this.set("nullable", True) 4814 self._match_r_paren() 4815 return this 4816 elif type_token in self.ENUM_TYPE_TOKENS: 4817 expressions = self._parse_csv(self._parse_equality) 4818 elif is_aggregate: 4819 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4820 any_token=False, tokens=(TokenType.VAR,) 4821 ) 4822 if not func_or_ident or not self._match(TokenType.COMMA): 4823 return None 4824 expressions = self._parse_csv( 4825 lambda: self._parse_types( 4826 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4827 ) 4828 ) 4829 expressions.insert(0, func_or_ident) 4830 else: 4831 expressions = self._parse_csv(self._parse_type_size) 4832 4833 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4834 if type_token == TokenType.VECTOR and len(expressions) == 2: 4835 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4836 4837 if not expressions or not self._match(TokenType.R_PAREN): 4838 self._retreat(index) 4839 return None 4840 4841 maybe_func = True 4842 4843 values: t.Optional[t.List[exp.Expression]] = None 4844 4845 if nested and self._match(TokenType.LT): 4846 if is_struct: 4847 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4848 else: 4849 expressions = self._parse_csv( 4850 lambda: self._parse_types( 4851 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4852 ) 4853 ) 4854 4855 if not self._match(TokenType.GT): 4856 self.raise_error("Expecting >") 4857 4858 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4859 values = self._parse_csv(self._parse_assignment) 4860 if not values and is_struct: 4861 values = None 4862 self._retreat(self._index - 1) 4863 else: 4864 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4865 4866 if type_token in self.TIMESTAMPS: 4867 if self._match_text_seq("WITH", "TIME", "ZONE"): 4868 maybe_func = False 4869 tz_type = ( 4870 exp.DataType.Type.TIMETZ 4871 if type_token in self.TIMES 4872 else exp.DataType.Type.TIMESTAMPTZ 4873 ) 4874 this = exp.DataType(this=tz_type, expressions=expressions) 4875 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4876 maybe_func = False 4877 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4878 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4879 maybe_func = False 4880 elif type_token == TokenType.INTERVAL: 4881 unit = self._parse_var(upper=True) 4882 if unit: 4883 if self._match_text_seq("TO"): 4884 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4885 4886 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4887 else: 4888 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4889 4890 if maybe_func and check_func: 4891 index2 = self._index 4892 peek = self._parse_string() 4893 4894 if not peek: 4895 self._retreat(index) 4896 return None 4897 4898 self._retreat(index2) 4899 4900 if not this: 4901 if self._match_text_seq("UNSIGNED"): 4902 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4903 if not unsigned_type_token: 4904 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4905 4906 type_token = unsigned_type_token or type_token 4907 4908 this = exp.DataType( 4909 this=exp.DataType.Type[type_token.value], 4910 expressions=expressions, 4911 nested=nested, 4912 prefix=prefix, 4913 ) 4914 4915 # Empty arrays/structs are allowed 4916 if values is not None: 4917 cls = exp.Struct if is_struct else exp.Array 4918 this = exp.cast(cls(expressions=values), this, copy=False) 4919 4920 elif expressions: 4921 this.set("expressions", expressions) 4922 4923 # https://materialize.com/docs/sql/types/list/#type-name 4924 while self._match(TokenType.LIST): 4925 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4926 4927 index = self._index 4928 4929 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4930 matched_array = self._match(TokenType.ARRAY) 4931 4932 while self._curr: 4933 datatype_token = self._prev.token_type 4934 matched_l_bracket = self._match(TokenType.L_BRACKET) 4935 if not matched_l_bracket and not matched_array: 4936 break 4937 4938 matched_array = False 4939 values = self._parse_csv(self._parse_assignment) or None 4940 if ( 4941 values 4942 and not schema 4943 and ( 4944 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4945 ) 4946 ): 4947 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4948 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4949 self._retreat(index) 4950 break 4951 4952 this = exp.DataType( 4953 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4954 ) 4955 self._match(TokenType.R_BRACKET) 4956 4957 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4958 converter = self.TYPE_CONVERTERS.get(this.this) 4959 if converter: 4960 this = converter(t.cast(exp.DataType, this)) 4961 4962 return this 4963 4964 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4965 index = self._index 4966 4967 if ( 4968 self._curr 4969 and self._next 4970 and self._curr.token_type in self.TYPE_TOKENS 4971 and self._next.token_type in self.TYPE_TOKENS 4972 ): 4973 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4974 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4975 this = self._parse_id_var() 4976 else: 4977 this = ( 4978 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4979 or self._parse_id_var() 4980 ) 4981 4982 self._match(TokenType.COLON) 4983 4984 if ( 4985 type_required 4986 and not isinstance(this, exp.DataType) 4987 and not self._match_set(self.TYPE_TOKENS, advance=False) 4988 ): 4989 self._retreat(index) 4990 return self._parse_types() 4991 4992 return self._parse_column_def(this) 4993 4994 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4995 if not self._match_text_seq("AT", "TIME", "ZONE"): 4996 return this 4997 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4998 4999 def _parse_column(self) -> t.Optional[exp.Expression]: 5000 this = self._parse_column_reference() 5001 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5002 5003 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5004 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5005 5006 return column 5007 5008 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5009 this = self._parse_field() 5010 if ( 5011 not this 5012 and self._match(TokenType.VALUES, advance=False) 5013 and self.VALUES_FOLLOWED_BY_PAREN 5014 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5015 ): 5016 this = self._parse_id_var() 5017 5018 if isinstance(this, exp.Identifier): 5019 # We bubble up comments from the Identifier to the Column 5020 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5021 5022 return this 5023 5024 def _parse_colon_as_variant_extract( 5025 self, this: t.Optional[exp.Expression] 5026 ) -> t.Optional[exp.Expression]: 5027 casts = [] 5028 json_path = [] 5029 escape = None 5030 5031 while self._match(TokenType.COLON): 5032 start_index = self._index 5033 5034 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5035 path = self._parse_column_ops( 5036 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5037 ) 5038 5039 # The cast :: operator has a lower precedence than the extraction operator :, so 5040 # we rearrange the AST appropriately to avoid casting the JSON path 5041 while isinstance(path, exp.Cast): 5042 casts.append(path.to) 5043 path = path.this 5044 5045 if casts: 5046 dcolon_offset = next( 5047 i 5048 for i, t in enumerate(self._tokens[start_index:]) 5049 if t.token_type == TokenType.DCOLON 5050 ) 5051 end_token = self._tokens[start_index + dcolon_offset - 1] 5052 else: 5053 end_token = self._prev 5054 5055 if path: 5056 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5057 # it'll roundtrip to a string literal in GET_PATH 5058 if isinstance(path, exp.Identifier) and path.quoted: 5059 escape = True 5060 5061 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5062 5063 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5064 # Databricks transforms it back to the colon/dot notation 5065 if json_path: 5066 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5067 5068 if json_path_expr: 5069 json_path_expr.set("escape", escape) 5070 5071 this = self.expression( 5072 exp.JSONExtract, 5073 this=this, 5074 expression=json_path_expr, 5075 variant_extract=True, 5076 ) 5077 5078 while casts: 5079 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5080 5081 return this 5082 5083 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5084 return self._parse_types() 5085 5086 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5087 this = self._parse_bracket(this) 5088 5089 while self._match_set(self.COLUMN_OPERATORS): 5090 op_token = self._prev.token_type 5091 op = self.COLUMN_OPERATORS.get(op_token) 5092 5093 if op_token == TokenType.DCOLON: 5094 field = self._parse_dcolon() 5095 if not field: 5096 self.raise_error("Expected type") 5097 elif op and self._curr: 5098 field = self._parse_column_reference() or self._parse_bracket() 5099 else: 5100 field = self._parse_field(any_token=True, anonymous_func=True) 5101 5102 if isinstance(field, exp.Func) and this: 5103 # bigquery allows function calls like x.y.count(...) 5104 # SAFE.SUBSTR(...) 5105 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5106 this = exp.replace_tree( 5107 this, 5108 lambda n: ( 5109 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5110 if n.table 5111 else n.this 5112 ) 5113 if isinstance(n, exp.Column) 5114 else n, 5115 ) 5116 5117 if op: 5118 this = op(self, this, field) 5119 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5120 this = self.expression( 5121 exp.Column, 5122 comments=this.comments, 5123 this=field, 5124 table=this.this, 5125 db=this.args.get("table"), 5126 catalog=this.args.get("db"), 5127 ) 5128 else: 5129 this = self.expression(exp.Dot, this=this, expression=field) 5130 5131 this = self._parse_bracket(this) 5132 5133 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5134 5135 def _parse_primary(self) -> t.Optional[exp.Expression]: 5136 if self._match_set(self.PRIMARY_PARSERS): 5137 token_type = self._prev.token_type 5138 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5139 5140 if token_type == TokenType.STRING: 5141 expressions = [primary] 5142 while self._match(TokenType.STRING): 5143 expressions.append(exp.Literal.string(self._prev.text)) 5144 5145 if len(expressions) > 1: 5146 return self.expression(exp.Concat, expressions=expressions) 5147 5148 return primary 5149 5150 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5151 return exp.Literal.number(f"0.{self._prev.text}") 5152 5153 if self._match(TokenType.L_PAREN): 5154 comments = self._prev_comments 5155 query = self._parse_select() 5156 5157 if query: 5158 expressions = [query] 5159 else: 5160 expressions = self._parse_expressions() 5161 5162 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5163 5164 if not this and self._match(TokenType.R_PAREN, advance=False): 5165 this = self.expression(exp.Tuple) 5166 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5167 this = self._parse_subquery(this=this, parse_alias=False) 5168 elif isinstance(this, exp.Subquery): 5169 this = self._parse_subquery( 5170 this=self._parse_set_operations(this), parse_alias=False 5171 ) 5172 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5173 this = self.expression(exp.Tuple, expressions=expressions) 5174 else: 5175 this = self.expression(exp.Paren, this=this) 5176 5177 if this: 5178 this.add_comments(comments) 5179 5180 self._match_r_paren(expression=this) 5181 return this 5182 5183 return None 5184 5185 def _parse_field( 5186 self, 5187 any_token: bool = False, 5188 tokens: t.Optional[t.Collection[TokenType]] = None, 5189 anonymous_func: bool = False, 5190 ) -> t.Optional[exp.Expression]: 5191 if anonymous_func: 5192 field = ( 5193 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5194 or self._parse_primary() 5195 ) 5196 else: 5197 field = self._parse_primary() or self._parse_function( 5198 anonymous=anonymous_func, any_token=any_token 5199 ) 5200 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5201 5202 def _parse_function( 5203 self, 5204 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5205 anonymous: bool = False, 5206 optional_parens: bool = True, 5207 any_token: bool = False, 5208 ) -> t.Optional[exp.Expression]: 5209 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5210 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5211 fn_syntax = False 5212 if ( 5213 self._match(TokenType.L_BRACE, advance=False) 5214 and self._next 5215 and self._next.text.upper() == "FN" 5216 ): 5217 self._advance(2) 5218 fn_syntax = True 5219 5220 func = self._parse_function_call( 5221 functions=functions, 5222 anonymous=anonymous, 5223 optional_parens=optional_parens, 5224 any_token=any_token, 5225 ) 5226 5227 if fn_syntax: 5228 self._match(TokenType.R_BRACE) 5229 5230 return func 5231 5232 def _parse_function_call( 5233 self, 5234 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5235 anonymous: bool = False, 5236 optional_parens: bool = True, 5237 any_token: bool = False, 5238 ) -> t.Optional[exp.Expression]: 5239 if not self._curr: 5240 return None 5241 5242 comments = self._curr.comments 5243 token_type = self._curr.token_type 5244 this = self._curr.text 5245 upper = this.upper() 5246 5247 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5248 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5249 self._advance() 5250 return self._parse_window(parser(self)) 5251 5252 if not self._next or self._next.token_type != TokenType.L_PAREN: 5253 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5254 self._advance() 5255 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5256 5257 return None 5258 5259 if any_token: 5260 if token_type in self.RESERVED_TOKENS: 5261 return None 5262 elif token_type not in self.FUNC_TOKENS: 5263 return None 5264 5265 self._advance(2) 5266 5267 parser = self.FUNCTION_PARSERS.get(upper) 5268 if parser and not anonymous: 5269 this = parser(self) 5270 else: 5271 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5272 5273 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5274 this = self.expression( 5275 subquery_predicate, comments=comments, this=self._parse_select() 5276 ) 5277 self._match_r_paren() 5278 return this 5279 5280 if functions is None: 5281 functions = self.FUNCTIONS 5282 5283 function = functions.get(upper) 5284 5285 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5286 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5287 5288 if alias: 5289 args = self._kv_to_prop_eq(args) 5290 5291 if function and not anonymous: 5292 if "dialect" in function.__code__.co_varnames: 5293 func = function(args, dialect=self.dialect) 5294 else: 5295 func = function(args) 5296 5297 func = self.validate_expression(func, args) 5298 if not self.dialect.NORMALIZE_FUNCTIONS: 5299 func.meta["name"] = this 5300 5301 this = func 5302 else: 5303 if token_type == TokenType.IDENTIFIER: 5304 this = exp.Identifier(this=this, quoted=True) 5305 this = self.expression(exp.Anonymous, this=this, expressions=args) 5306 5307 if isinstance(this, exp.Expression): 5308 this.add_comments(comments) 5309 5310 self._match_r_paren(this) 5311 return self._parse_window(this) 5312 5313 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5314 return expression 5315 5316 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5317 transformed = [] 5318 5319 for index, e in enumerate(expressions): 5320 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5321 if isinstance(e, exp.Alias): 5322 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5323 5324 if not isinstance(e, exp.PropertyEQ): 5325 e = self.expression( 5326 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5327 ) 5328 5329 if isinstance(e.this, exp.Column): 5330 e.this.replace(e.this.this) 5331 else: 5332 e = self._to_prop_eq(e, index) 5333 5334 transformed.append(e) 5335 5336 return transformed 5337 5338 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5339 return self._parse_column_def(self._parse_id_var()) 5340 5341 def _parse_user_defined_function( 5342 self, kind: t.Optional[TokenType] = None 5343 ) -> t.Optional[exp.Expression]: 5344 this = self._parse_id_var() 5345 5346 while self._match(TokenType.DOT): 5347 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5348 5349 if not self._match(TokenType.L_PAREN): 5350 return this 5351 5352 expressions = self._parse_csv(self._parse_function_parameter) 5353 self._match_r_paren() 5354 return self.expression( 5355 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5356 ) 5357 5358 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5359 literal = self._parse_primary() 5360 if literal: 5361 return self.expression(exp.Introducer, this=token.text, expression=literal) 5362 5363 return self.expression(exp.Identifier, this=token.text) 5364 5365 def _parse_session_parameter(self) -> exp.SessionParameter: 5366 kind = None 5367 this = self._parse_id_var() or self._parse_primary() 5368 5369 if this and self._match(TokenType.DOT): 5370 kind = this.name 5371 this = self._parse_var() or self._parse_primary() 5372 5373 return self.expression(exp.SessionParameter, this=this, kind=kind) 5374 5375 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5376 return self._parse_id_var() 5377 5378 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5379 index = self._index 5380 5381 if self._match(TokenType.L_PAREN): 5382 expressions = t.cast( 5383 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5384 ) 5385 5386 if not self._match(TokenType.R_PAREN): 5387 self._retreat(index) 5388 else: 5389 expressions = [self._parse_lambda_arg()] 5390 5391 if self._match_set(self.LAMBDAS): 5392 return self.LAMBDAS[self._prev.token_type](self, expressions) 5393 5394 self._retreat(index) 5395 5396 this: t.Optional[exp.Expression] 5397 5398 if self._match(TokenType.DISTINCT): 5399 this = self.expression( 5400 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5401 ) 5402 else: 5403 this = self._parse_select_or_expression(alias=alias) 5404 5405 return self._parse_limit( 5406 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5407 ) 5408 5409 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5410 index = self._index 5411 if not self._match(TokenType.L_PAREN): 5412 return this 5413 5414 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5415 # expr can be of both types 5416 if self._match_set(self.SELECT_START_TOKENS): 5417 self._retreat(index) 5418 return this 5419 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5420 self._match_r_paren() 5421 return self.expression(exp.Schema, this=this, expressions=args) 5422 5423 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5424 return self._parse_column_def(self._parse_field(any_token=True)) 5425 5426 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5427 # column defs are not really columns, they're identifiers 5428 if isinstance(this, exp.Column): 5429 this = this.this 5430 5431 kind = self._parse_types(schema=True) 5432 5433 if self._match_text_seq("FOR", "ORDINALITY"): 5434 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5435 5436 constraints: t.List[exp.Expression] = [] 5437 5438 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5439 ("ALIAS", "MATERIALIZED") 5440 ): 5441 persisted = self._prev.text.upper() == "MATERIALIZED" 5442 constraint_kind = exp.ComputedColumnConstraint( 5443 this=self._parse_assignment(), 5444 persisted=persisted or self._match_text_seq("PERSISTED"), 5445 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5446 ) 5447 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5448 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5449 self._match(TokenType.ALIAS) 5450 constraints.append( 5451 self.expression( 5452 exp.ColumnConstraint, 5453 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5454 ) 5455 ) 5456 5457 while True: 5458 constraint = self._parse_column_constraint() 5459 if not constraint: 5460 break 5461 constraints.append(constraint) 5462 5463 if not kind and not constraints: 5464 return this 5465 5466 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5467 5468 def _parse_auto_increment( 5469 self, 5470 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5471 start = None 5472 increment = None 5473 5474 if self._match(TokenType.L_PAREN, advance=False): 5475 args = self._parse_wrapped_csv(self._parse_bitwise) 5476 start = seq_get(args, 0) 5477 increment = seq_get(args, 1) 5478 elif self._match_text_seq("START"): 5479 start = self._parse_bitwise() 5480 self._match_text_seq("INCREMENT") 5481 increment = self._parse_bitwise() 5482 5483 if start and increment: 5484 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5485 5486 return exp.AutoIncrementColumnConstraint() 5487 5488 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5489 if not self._match_text_seq("REFRESH"): 5490 self._retreat(self._index - 1) 5491 return None 5492 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5493 5494 def _parse_compress(self) -> exp.CompressColumnConstraint: 5495 if self._match(TokenType.L_PAREN, advance=False): 5496 return self.expression( 5497 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5498 ) 5499 5500 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5501 5502 def _parse_generated_as_identity( 5503 self, 5504 ) -> ( 5505 exp.GeneratedAsIdentityColumnConstraint 5506 | exp.ComputedColumnConstraint 5507 | exp.GeneratedAsRowColumnConstraint 5508 ): 5509 if self._match_text_seq("BY", "DEFAULT"): 5510 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5511 this = self.expression( 5512 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5513 ) 5514 else: 5515 self._match_text_seq("ALWAYS") 5516 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5517 5518 self._match(TokenType.ALIAS) 5519 5520 if self._match_text_seq("ROW"): 5521 start = self._match_text_seq("START") 5522 if not start: 5523 self._match(TokenType.END) 5524 hidden = self._match_text_seq("HIDDEN") 5525 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5526 5527 identity = self._match_text_seq("IDENTITY") 5528 5529 if self._match(TokenType.L_PAREN): 5530 if self._match(TokenType.START_WITH): 5531 this.set("start", self._parse_bitwise()) 5532 if self._match_text_seq("INCREMENT", "BY"): 5533 this.set("increment", self._parse_bitwise()) 5534 if self._match_text_seq("MINVALUE"): 5535 this.set("minvalue", self._parse_bitwise()) 5536 if self._match_text_seq("MAXVALUE"): 5537 this.set("maxvalue", self._parse_bitwise()) 5538 5539 if self._match_text_seq("CYCLE"): 5540 this.set("cycle", True) 5541 elif self._match_text_seq("NO", "CYCLE"): 5542 this.set("cycle", False) 5543 5544 if not identity: 5545 this.set("expression", self._parse_range()) 5546 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5547 args = self._parse_csv(self._parse_bitwise) 5548 this.set("start", seq_get(args, 0)) 5549 this.set("increment", seq_get(args, 1)) 5550 5551 self._match_r_paren() 5552 5553 return this 5554 5555 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5556 self._match_text_seq("LENGTH") 5557 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5558 5559 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5560 if self._match_text_seq("NULL"): 5561 return self.expression(exp.NotNullColumnConstraint) 5562 if self._match_text_seq("CASESPECIFIC"): 5563 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5564 if self._match_text_seq("FOR", "REPLICATION"): 5565 return self.expression(exp.NotForReplicationColumnConstraint) 5566 5567 # Unconsume the `NOT` token 5568 self._retreat(self._index - 1) 5569 return None 5570 5571 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5572 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5573 5574 procedure_option_follows = ( 5575 self._match(TokenType.WITH, advance=False) 5576 and self._next 5577 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5578 ) 5579 5580 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5581 return self.expression( 5582 exp.ColumnConstraint, 5583 this=this, 5584 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5585 ) 5586 5587 return this 5588 5589 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5590 if not self._match(TokenType.CONSTRAINT): 5591 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5592 5593 return self.expression( 5594 exp.Constraint, 5595 this=self._parse_id_var(), 5596 expressions=self._parse_unnamed_constraints(), 5597 ) 5598 5599 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5600 constraints = [] 5601 while True: 5602 constraint = self._parse_unnamed_constraint() or self._parse_function() 5603 if not constraint: 5604 break 5605 constraints.append(constraint) 5606 5607 return constraints 5608 5609 def _parse_unnamed_constraint( 5610 self, constraints: t.Optional[t.Collection[str]] = None 5611 ) -> t.Optional[exp.Expression]: 5612 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5613 constraints or self.CONSTRAINT_PARSERS 5614 ): 5615 return None 5616 5617 constraint = self._prev.text.upper() 5618 if constraint not in self.CONSTRAINT_PARSERS: 5619 self.raise_error(f"No parser found for schema constraint {constraint}.") 5620 5621 return self.CONSTRAINT_PARSERS[constraint](self) 5622 5623 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5624 return self._parse_id_var(any_token=False) 5625 5626 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5627 self._match_text_seq("KEY") 5628 return self.expression( 5629 exp.UniqueColumnConstraint, 5630 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5631 this=self._parse_schema(self._parse_unique_key()), 5632 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5633 on_conflict=self._parse_on_conflict(), 5634 ) 5635 5636 def _parse_key_constraint_options(self) -> t.List[str]: 5637 options = [] 5638 while True: 5639 if not self._curr: 5640 break 5641 5642 if self._match(TokenType.ON): 5643 action = None 5644 on = self._advance_any() and self._prev.text 5645 5646 if self._match_text_seq("NO", "ACTION"): 5647 action = "NO ACTION" 5648 elif self._match_text_seq("CASCADE"): 5649 action = "CASCADE" 5650 elif self._match_text_seq("RESTRICT"): 5651 action = "RESTRICT" 5652 elif self._match_pair(TokenType.SET, TokenType.NULL): 5653 action = "SET NULL" 5654 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5655 action = "SET DEFAULT" 5656 else: 5657 self.raise_error("Invalid key constraint") 5658 5659 options.append(f"ON {on} {action}") 5660 else: 5661 var = self._parse_var_from_options( 5662 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5663 ) 5664 if not var: 5665 break 5666 options.append(var.name) 5667 5668 return options 5669 5670 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5671 if match and not self._match(TokenType.REFERENCES): 5672 return None 5673 5674 expressions = None 5675 this = self._parse_table(schema=True) 5676 options = self._parse_key_constraint_options() 5677 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5678 5679 def _parse_foreign_key(self) -> exp.ForeignKey: 5680 expressions = self._parse_wrapped_id_vars() 5681 reference = self._parse_references() 5682 options = {} 5683 5684 while self._match(TokenType.ON): 5685 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5686 self.raise_error("Expected DELETE or UPDATE") 5687 5688 kind = self._prev.text.lower() 5689 5690 if self._match_text_seq("NO", "ACTION"): 5691 action = "NO ACTION" 5692 elif self._match(TokenType.SET): 5693 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5694 action = "SET " + self._prev.text.upper() 5695 else: 5696 self._advance() 5697 action = self._prev.text.upper() 5698 5699 options[kind] = action 5700 5701 return self.expression( 5702 exp.ForeignKey, 5703 expressions=expressions, 5704 reference=reference, 5705 **options, # type: ignore 5706 ) 5707 5708 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5709 return self._parse_field() 5710 5711 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5712 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5713 self._retreat(self._index - 1) 5714 return None 5715 5716 id_vars = self._parse_wrapped_id_vars() 5717 return self.expression( 5718 exp.PeriodForSystemTimeConstraint, 5719 this=seq_get(id_vars, 0), 5720 expression=seq_get(id_vars, 1), 5721 ) 5722 5723 def _parse_primary_key( 5724 self, wrapped_optional: bool = False, in_props: bool = False 5725 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5726 desc = ( 5727 self._match_set((TokenType.ASC, TokenType.DESC)) 5728 and self._prev.token_type == TokenType.DESC 5729 ) 5730 5731 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5732 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5733 5734 expressions = self._parse_wrapped_csv( 5735 self._parse_primary_key_part, optional=wrapped_optional 5736 ) 5737 options = self._parse_key_constraint_options() 5738 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5739 5740 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5741 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5742 5743 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5744 """ 5745 Parses a datetime column in ODBC format. We parse the column into the corresponding 5746 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5747 same as we did for `DATE('yyyy-mm-dd')`. 5748 5749 Reference: 5750 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5751 """ 5752 self._match(TokenType.VAR) 5753 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5754 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5755 if not self._match(TokenType.R_BRACE): 5756 self.raise_error("Expected }") 5757 return expression 5758 5759 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5760 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5761 return this 5762 5763 bracket_kind = self._prev.token_type 5764 if ( 5765 bracket_kind == TokenType.L_BRACE 5766 and self._curr 5767 and self._curr.token_type == TokenType.VAR 5768 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5769 ): 5770 return self._parse_odbc_datetime_literal() 5771 5772 expressions = self._parse_csv( 5773 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5774 ) 5775 5776 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5777 self.raise_error("Expected ]") 5778 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5779 self.raise_error("Expected }") 5780 5781 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5782 if bracket_kind == TokenType.L_BRACE: 5783 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5784 elif not this: 5785 this = build_array_constructor( 5786 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5787 ) 5788 else: 5789 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5790 if constructor_type: 5791 return build_array_constructor( 5792 constructor_type, 5793 args=expressions, 5794 bracket_kind=bracket_kind, 5795 dialect=self.dialect, 5796 ) 5797 5798 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5799 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5800 5801 self._add_comments(this) 5802 return self._parse_bracket(this) 5803 5804 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5805 if self._match(TokenType.COLON): 5806 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5807 return this 5808 5809 def _parse_case(self) -> t.Optional[exp.Expression]: 5810 ifs = [] 5811 default = None 5812 5813 comments = self._prev_comments 5814 expression = self._parse_assignment() 5815 5816 while self._match(TokenType.WHEN): 5817 this = self._parse_assignment() 5818 self._match(TokenType.THEN) 5819 then = self._parse_assignment() 5820 ifs.append(self.expression(exp.If, this=this, true=then)) 5821 5822 if self._match(TokenType.ELSE): 5823 default = self._parse_assignment() 5824 5825 if not self._match(TokenType.END): 5826 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5827 default = exp.column("interval") 5828 else: 5829 self.raise_error("Expected END after CASE", self._prev) 5830 5831 return self.expression( 5832 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5833 ) 5834 5835 def _parse_if(self) -> t.Optional[exp.Expression]: 5836 if self._match(TokenType.L_PAREN): 5837 args = self._parse_csv(self._parse_assignment) 5838 this = self.validate_expression(exp.If.from_arg_list(args), args) 5839 self._match_r_paren() 5840 else: 5841 index = self._index - 1 5842 5843 if self.NO_PAREN_IF_COMMANDS and index == 0: 5844 return self._parse_as_command(self._prev) 5845 5846 condition = self._parse_assignment() 5847 5848 if not condition: 5849 self._retreat(index) 5850 return None 5851 5852 self._match(TokenType.THEN) 5853 true = self._parse_assignment() 5854 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5855 self._match(TokenType.END) 5856 this = self.expression(exp.If, this=condition, true=true, false=false) 5857 5858 return this 5859 5860 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5861 if not self._match_text_seq("VALUE", "FOR"): 5862 self._retreat(self._index - 1) 5863 return None 5864 5865 return self.expression( 5866 exp.NextValueFor, 5867 this=self._parse_column(), 5868 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5869 ) 5870 5871 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5872 this = self._parse_function() or self._parse_var_or_string(upper=True) 5873 5874 if self._match(TokenType.FROM): 5875 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5876 5877 if not self._match(TokenType.COMMA): 5878 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5879 5880 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5881 5882 def _parse_gap_fill(self) -> exp.GapFill: 5883 self._match(TokenType.TABLE) 5884 this = self._parse_table() 5885 5886 self._match(TokenType.COMMA) 5887 args = [this, *self._parse_csv(self._parse_lambda)] 5888 5889 gap_fill = exp.GapFill.from_arg_list(args) 5890 return self.validate_expression(gap_fill, args) 5891 5892 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5893 this = self._parse_assignment() 5894 5895 if not self._match(TokenType.ALIAS): 5896 if self._match(TokenType.COMMA): 5897 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5898 5899 self.raise_error("Expected AS after CAST") 5900 5901 fmt = None 5902 to = self._parse_types() 5903 5904 if self._match(TokenType.FORMAT): 5905 fmt_string = self._parse_string() 5906 fmt = self._parse_at_time_zone(fmt_string) 5907 5908 if not to: 5909 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5910 if to.this in exp.DataType.TEMPORAL_TYPES: 5911 this = self.expression( 5912 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5913 this=this, 5914 format=exp.Literal.string( 5915 format_time( 5916 fmt_string.this if fmt_string else "", 5917 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5918 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5919 ) 5920 ), 5921 safe=safe, 5922 ) 5923 5924 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5925 this.set("zone", fmt.args["zone"]) 5926 return this 5927 elif not to: 5928 self.raise_error("Expected TYPE after CAST") 5929 elif isinstance(to, exp.Identifier): 5930 to = exp.DataType.build(to.name, udt=True) 5931 elif to.this == exp.DataType.Type.CHAR: 5932 if self._match(TokenType.CHARACTER_SET): 5933 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5934 5935 return self.expression( 5936 exp.Cast if strict else exp.TryCast, 5937 this=this, 5938 to=to, 5939 format=fmt, 5940 safe=safe, 5941 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5942 ) 5943 5944 def _parse_string_agg(self) -> exp.Expression: 5945 if self._match(TokenType.DISTINCT): 5946 args: t.List[t.Optional[exp.Expression]] = [ 5947 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5948 ] 5949 if self._match(TokenType.COMMA): 5950 args.extend(self._parse_csv(self._parse_assignment)) 5951 else: 5952 args = self._parse_csv(self._parse_assignment) # type: ignore 5953 5954 index = self._index 5955 if not self._match(TokenType.R_PAREN) and args: 5956 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5957 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5958 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5959 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5960 5961 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5962 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5963 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5964 if not self._match_text_seq("WITHIN", "GROUP"): 5965 self._retreat(index) 5966 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5967 5968 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5969 order = self._parse_order(this=seq_get(args, 0)) 5970 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5971 5972 def _parse_convert( 5973 self, strict: bool, safe: t.Optional[bool] = None 5974 ) -> t.Optional[exp.Expression]: 5975 this = self._parse_bitwise() 5976 5977 if self._match(TokenType.USING): 5978 to: t.Optional[exp.Expression] = self.expression( 5979 exp.CharacterSet, this=self._parse_var() 5980 ) 5981 elif self._match(TokenType.COMMA): 5982 to = self._parse_types() 5983 else: 5984 to = None 5985 5986 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5987 5988 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5989 """ 5990 There are generally two variants of the DECODE function: 5991 5992 - DECODE(bin, charset) 5993 - DECODE(expression, search, result [, search, result] ... [, default]) 5994 5995 The second variant will always be parsed into a CASE expression. Note that NULL 5996 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5997 instead of relying on pattern matching. 5998 """ 5999 args = self._parse_csv(self._parse_assignment) 6000 6001 if len(args) < 3: 6002 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6003 6004 expression, *expressions = args 6005 if not expression: 6006 return None 6007 6008 ifs = [] 6009 for search, result in zip(expressions[::2], expressions[1::2]): 6010 if not search or not result: 6011 return None 6012 6013 if isinstance(search, exp.Literal): 6014 ifs.append( 6015 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6016 ) 6017 elif isinstance(search, exp.Null): 6018 ifs.append( 6019 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6020 ) 6021 else: 6022 cond = exp.or_( 6023 exp.EQ(this=expression.copy(), expression=search), 6024 exp.and_( 6025 exp.Is(this=expression.copy(), expression=exp.Null()), 6026 exp.Is(this=search.copy(), expression=exp.Null()), 6027 copy=False, 6028 ), 6029 copy=False, 6030 ) 6031 ifs.append(exp.If(this=cond, true=result)) 6032 6033 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6034 6035 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6036 self._match_text_seq("KEY") 6037 key = self._parse_column() 6038 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6039 self._match_text_seq("VALUE") 6040 value = self._parse_bitwise() 6041 6042 if not key and not value: 6043 return None 6044 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6045 6046 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6047 if not this or not self._match_text_seq("FORMAT", "JSON"): 6048 return this 6049 6050 return self.expression(exp.FormatJson, this=this) 6051 6052 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6053 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6054 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6055 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6056 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6057 else: 6058 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6059 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6060 6061 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6062 6063 if not empty and not error and not null: 6064 return None 6065 6066 return self.expression( 6067 exp.OnCondition, 6068 empty=empty, 6069 error=error, 6070 null=null, 6071 ) 6072 6073 def _parse_on_handling( 6074 self, on: str, *values: str 6075 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6076 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6077 for value in values: 6078 if self._match_text_seq(value, "ON", on): 6079 return f"{value} ON {on}" 6080 6081 index = self._index 6082 if self._match(TokenType.DEFAULT): 6083 default_value = self._parse_bitwise() 6084 if self._match_text_seq("ON", on): 6085 return default_value 6086 6087 self._retreat(index) 6088 6089 return None 6090 6091 @t.overload 6092 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6093 6094 @t.overload 6095 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6096 6097 def _parse_json_object(self, agg=False): 6098 star = self._parse_star() 6099 expressions = ( 6100 [star] 6101 if star 6102 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6103 ) 6104 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6105 6106 unique_keys = None 6107 if self._match_text_seq("WITH", "UNIQUE"): 6108 unique_keys = True 6109 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6110 unique_keys = False 6111 6112 self._match_text_seq("KEYS") 6113 6114 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6115 self._parse_type() 6116 ) 6117 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6118 6119 return self.expression( 6120 exp.JSONObjectAgg if agg else exp.JSONObject, 6121 expressions=expressions, 6122 null_handling=null_handling, 6123 unique_keys=unique_keys, 6124 return_type=return_type, 6125 encoding=encoding, 6126 ) 6127 6128 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6129 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6130 if not self._match_text_seq("NESTED"): 6131 this = self._parse_id_var() 6132 kind = self._parse_types(allow_identifiers=False) 6133 nested = None 6134 else: 6135 this = None 6136 kind = None 6137 nested = True 6138 6139 path = self._match_text_seq("PATH") and self._parse_string() 6140 nested_schema = nested and self._parse_json_schema() 6141 6142 return self.expression( 6143 exp.JSONColumnDef, 6144 this=this, 6145 kind=kind, 6146 path=path, 6147 nested_schema=nested_schema, 6148 ) 6149 6150 def _parse_json_schema(self) -> exp.JSONSchema: 6151 self._match_text_seq("COLUMNS") 6152 return self.expression( 6153 exp.JSONSchema, 6154 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6155 ) 6156 6157 def _parse_json_table(self) -> exp.JSONTable: 6158 this = self._parse_format_json(self._parse_bitwise()) 6159 path = self._match(TokenType.COMMA) and self._parse_string() 6160 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6161 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6162 schema = self._parse_json_schema() 6163 6164 return exp.JSONTable( 6165 this=this, 6166 schema=schema, 6167 path=path, 6168 error_handling=error_handling, 6169 empty_handling=empty_handling, 6170 ) 6171 6172 def _parse_match_against(self) -> exp.MatchAgainst: 6173 expressions = self._parse_csv(self._parse_column) 6174 6175 self._match_text_seq(")", "AGAINST", "(") 6176 6177 this = self._parse_string() 6178 6179 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6180 modifier = "IN NATURAL LANGUAGE MODE" 6181 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6182 modifier = f"{modifier} WITH QUERY EXPANSION" 6183 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6184 modifier = "IN BOOLEAN MODE" 6185 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6186 modifier = "WITH QUERY EXPANSION" 6187 else: 6188 modifier = None 6189 6190 return self.expression( 6191 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6192 ) 6193 6194 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6195 def _parse_open_json(self) -> exp.OpenJSON: 6196 this = self._parse_bitwise() 6197 path = self._match(TokenType.COMMA) and self._parse_string() 6198 6199 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6200 this = self._parse_field(any_token=True) 6201 kind = self._parse_types() 6202 path = self._parse_string() 6203 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6204 6205 return self.expression( 6206 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6207 ) 6208 6209 expressions = None 6210 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6211 self._match_l_paren() 6212 expressions = self._parse_csv(_parse_open_json_column_def) 6213 6214 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6215 6216 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6217 args = self._parse_csv(self._parse_bitwise) 6218 6219 if self._match(TokenType.IN): 6220 return self.expression( 6221 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6222 ) 6223 6224 if haystack_first: 6225 haystack = seq_get(args, 0) 6226 needle = seq_get(args, 1) 6227 else: 6228 needle = seq_get(args, 0) 6229 haystack = seq_get(args, 1) 6230 6231 return self.expression( 6232 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6233 ) 6234 6235 def _parse_predict(self) -> exp.Predict: 6236 self._match_text_seq("MODEL") 6237 this = self._parse_table() 6238 6239 self._match(TokenType.COMMA) 6240 self._match_text_seq("TABLE") 6241 6242 return self.expression( 6243 exp.Predict, 6244 this=this, 6245 expression=self._parse_table(), 6246 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6247 ) 6248 6249 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6250 args = self._parse_csv(self._parse_table) 6251 return exp.JoinHint(this=func_name.upper(), expressions=args) 6252 6253 def _parse_substring(self) -> exp.Substring: 6254 # Postgres supports the form: substring(string [from int] [for int]) 6255 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6256 6257 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6258 6259 if self._match(TokenType.FROM): 6260 args.append(self._parse_bitwise()) 6261 if self._match(TokenType.FOR): 6262 if len(args) == 1: 6263 args.append(exp.Literal.number(1)) 6264 args.append(self._parse_bitwise()) 6265 6266 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6267 6268 def _parse_trim(self) -> exp.Trim: 6269 # https://www.w3resource.com/sql/character-functions/trim.php 6270 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6271 6272 position = None 6273 collation = None 6274 expression = None 6275 6276 if self._match_texts(self.TRIM_TYPES): 6277 position = self._prev.text.upper() 6278 6279 this = self._parse_bitwise() 6280 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6281 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6282 expression = self._parse_bitwise() 6283 6284 if invert_order: 6285 this, expression = expression, this 6286 6287 if self._match(TokenType.COLLATE): 6288 collation = self._parse_bitwise() 6289 6290 return self.expression( 6291 exp.Trim, this=this, position=position, expression=expression, collation=collation 6292 ) 6293 6294 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6295 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6296 6297 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6298 return self._parse_window(self._parse_id_var(), alias=True) 6299 6300 def _parse_respect_or_ignore_nulls( 6301 self, this: t.Optional[exp.Expression] 6302 ) -> t.Optional[exp.Expression]: 6303 if self._match_text_seq("IGNORE", "NULLS"): 6304 return self.expression(exp.IgnoreNulls, this=this) 6305 if self._match_text_seq("RESPECT", "NULLS"): 6306 return self.expression(exp.RespectNulls, this=this) 6307 return this 6308 6309 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6310 if self._match(TokenType.HAVING): 6311 self._match_texts(("MAX", "MIN")) 6312 max = self._prev.text.upper() != "MIN" 6313 return self.expression( 6314 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6315 ) 6316 6317 return this 6318 6319 def _parse_window( 6320 self, this: t.Optional[exp.Expression], alias: bool = False 6321 ) -> t.Optional[exp.Expression]: 6322 func = this 6323 comments = func.comments if isinstance(func, exp.Expression) else None 6324 6325 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6326 self._match(TokenType.WHERE) 6327 this = self.expression( 6328 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6329 ) 6330 self._match_r_paren() 6331 6332 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6333 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6334 if self._match_text_seq("WITHIN", "GROUP"): 6335 order = self._parse_wrapped(self._parse_order) 6336 this = self.expression(exp.WithinGroup, this=this, expression=order) 6337 6338 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6339 # Some dialects choose to implement and some do not. 6340 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6341 6342 # There is some code above in _parse_lambda that handles 6343 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6344 6345 # The below changes handle 6346 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6347 6348 # Oracle allows both formats 6349 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6350 # and Snowflake chose to do the same for familiarity 6351 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6352 if isinstance(this, exp.AggFunc): 6353 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6354 6355 if ignore_respect and ignore_respect is not this: 6356 ignore_respect.replace(ignore_respect.this) 6357 this = self.expression(ignore_respect.__class__, this=this) 6358 6359 this = self._parse_respect_or_ignore_nulls(this) 6360 6361 # bigquery select from window x AS (partition by ...) 6362 if alias: 6363 over = None 6364 self._match(TokenType.ALIAS) 6365 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6366 return this 6367 else: 6368 over = self._prev.text.upper() 6369 6370 if comments and isinstance(func, exp.Expression): 6371 func.pop_comments() 6372 6373 if not self._match(TokenType.L_PAREN): 6374 return self.expression( 6375 exp.Window, 6376 comments=comments, 6377 this=this, 6378 alias=self._parse_id_var(False), 6379 over=over, 6380 ) 6381 6382 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6383 6384 first = self._match(TokenType.FIRST) 6385 if self._match_text_seq("LAST"): 6386 first = False 6387 6388 partition, order = self._parse_partition_and_order() 6389 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6390 6391 if kind: 6392 self._match(TokenType.BETWEEN) 6393 start = self._parse_window_spec() 6394 self._match(TokenType.AND) 6395 end = self._parse_window_spec() 6396 6397 spec = self.expression( 6398 exp.WindowSpec, 6399 kind=kind, 6400 start=start["value"], 6401 start_side=start["side"], 6402 end=end["value"], 6403 end_side=end["side"], 6404 ) 6405 else: 6406 spec = None 6407 6408 self._match_r_paren() 6409 6410 window = self.expression( 6411 exp.Window, 6412 comments=comments, 6413 this=this, 6414 partition_by=partition, 6415 order=order, 6416 spec=spec, 6417 alias=window_alias, 6418 over=over, 6419 first=first, 6420 ) 6421 6422 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6423 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6424 return self._parse_window(window, alias=alias) 6425 6426 return window 6427 6428 def _parse_partition_and_order( 6429 self, 6430 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6431 return self._parse_partition_by(), self._parse_order() 6432 6433 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6434 self._match(TokenType.BETWEEN) 6435 6436 return { 6437 "value": ( 6438 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6439 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6440 or self._parse_bitwise() 6441 ), 6442 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6443 } 6444 6445 def _parse_alias( 6446 self, this: t.Optional[exp.Expression], explicit: bool = False 6447 ) -> t.Optional[exp.Expression]: 6448 any_token = self._match(TokenType.ALIAS) 6449 comments = self._prev_comments or [] 6450 6451 if explicit and not any_token: 6452 return this 6453 6454 if self._match(TokenType.L_PAREN): 6455 aliases = self.expression( 6456 exp.Aliases, 6457 comments=comments, 6458 this=this, 6459 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6460 ) 6461 self._match_r_paren(aliases) 6462 return aliases 6463 6464 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6465 self.STRING_ALIASES and self._parse_string_as_identifier() 6466 ) 6467 6468 if alias: 6469 comments.extend(alias.pop_comments()) 6470 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6471 column = this.this 6472 6473 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6474 if not this.comments and column and column.comments: 6475 this.comments = column.pop_comments() 6476 6477 return this 6478 6479 def _parse_id_var( 6480 self, 6481 any_token: bool = True, 6482 tokens: t.Optional[t.Collection[TokenType]] = None, 6483 ) -> t.Optional[exp.Expression]: 6484 expression = self._parse_identifier() 6485 if not expression and ( 6486 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6487 ): 6488 quoted = self._prev.token_type == TokenType.STRING 6489 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6490 6491 return expression 6492 6493 def _parse_string(self) -> t.Optional[exp.Expression]: 6494 if self._match_set(self.STRING_PARSERS): 6495 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6496 return self._parse_placeholder() 6497 6498 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6499 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6500 6501 def _parse_number(self) -> t.Optional[exp.Expression]: 6502 if self._match_set(self.NUMERIC_PARSERS): 6503 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6504 return self._parse_placeholder() 6505 6506 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6507 if self._match(TokenType.IDENTIFIER): 6508 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6509 return self._parse_placeholder() 6510 6511 def _parse_var( 6512 self, 6513 any_token: bool = False, 6514 tokens: t.Optional[t.Collection[TokenType]] = None, 6515 upper: bool = False, 6516 ) -> t.Optional[exp.Expression]: 6517 if ( 6518 (any_token and self._advance_any()) 6519 or self._match(TokenType.VAR) 6520 or (self._match_set(tokens) if tokens else False) 6521 ): 6522 return self.expression( 6523 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6524 ) 6525 return self._parse_placeholder() 6526 6527 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6528 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6529 self._advance() 6530 return self._prev 6531 return None 6532 6533 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6534 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6535 6536 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6537 return self._parse_primary() or self._parse_var(any_token=True) 6538 6539 def _parse_null(self) -> t.Optional[exp.Expression]: 6540 if self._match_set(self.NULL_TOKENS): 6541 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6542 return self._parse_placeholder() 6543 6544 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6545 if self._match(TokenType.TRUE): 6546 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6547 if self._match(TokenType.FALSE): 6548 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6549 return self._parse_placeholder() 6550 6551 def _parse_star(self) -> t.Optional[exp.Expression]: 6552 if self._match(TokenType.STAR): 6553 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6554 return self._parse_placeholder() 6555 6556 def _parse_parameter(self) -> exp.Parameter: 6557 this = self._parse_identifier() or self._parse_primary_or_var() 6558 return self.expression(exp.Parameter, this=this) 6559 6560 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6561 if self._match_set(self.PLACEHOLDER_PARSERS): 6562 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6563 if placeholder: 6564 return placeholder 6565 self._advance(-1) 6566 return None 6567 6568 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6569 if not self._match_texts(keywords): 6570 return None 6571 if self._match(TokenType.L_PAREN, advance=False): 6572 return self._parse_wrapped_csv(self._parse_expression) 6573 6574 expression = self._parse_expression() 6575 return [expression] if expression else None 6576 6577 def _parse_csv( 6578 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6579 ) -> t.List[exp.Expression]: 6580 parse_result = parse_method() 6581 items = [parse_result] if parse_result is not None else [] 6582 6583 while self._match(sep): 6584 self._add_comments(parse_result) 6585 parse_result = parse_method() 6586 if parse_result is not None: 6587 items.append(parse_result) 6588 6589 return items 6590 6591 def _parse_tokens( 6592 self, parse_method: t.Callable, expressions: t.Dict 6593 ) -> t.Optional[exp.Expression]: 6594 this = parse_method() 6595 6596 while self._match_set(expressions): 6597 this = self.expression( 6598 expressions[self._prev.token_type], 6599 this=this, 6600 comments=self._prev_comments, 6601 expression=parse_method(), 6602 ) 6603 6604 return this 6605 6606 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6607 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6608 6609 def _parse_wrapped_csv( 6610 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6611 ) -> t.List[exp.Expression]: 6612 return self._parse_wrapped( 6613 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6614 ) 6615 6616 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6617 wrapped = self._match(TokenType.L_PAREN) 6618 if not wrapped and not optional: 6619 self.raise_error("Expecting (") 6620 parse_result = parse_method() 6621 if wrapped: 6622 self._match_r_paren() 6623 return parse_result 6624 6625 def _parse_expressions(self) -> t.List[exp.Expression]: 6626 return self._parse_csv(self._parse_expression) 6627 6628 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6629 return self._parse_select() or self._parse_set_operations( 6630 self._parse_expression() if alias else self._parse_assignment() 6631 ) 6632 6633 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6634 return self._parse_query_modifiers( 6635 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6636 ) 6637 6638 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6639 this = None 6640 if self._match_texts(self.TRANSACTION_KIND): 6641 this = self._prev.text 6642 6643 self._match_texts(("TRANSACTION", "WORK")) 6644 6645 modes = [] 6646 while True: 6647 mode = [] 6648 while self._match(TokenType.VAR): 6649 mode.append(self._prev.text) 6650 6651 if mode: 6652 modes.append(" ".join(mode)) 6653 if not self._match(TokenType.COMMA): 6654 break 6655 6656 return self.expression(exp.Transaction, this=this, modes=modes) 6657 6658 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6659 chain = None 6660 savepoint = None 6661 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6662 6663 self._match_texts(("TRANSACTION", "WORK")) 6664 6665 if self._match_text_seq("TO"): 6666 self._match_text_seq("SAVEPOINT") 6667 savepoint = self._parse_id_var() 6668 6669 if self._match(TokenType.AND): 6670 chain = not self._match_text_seq("NO") 6671 self._match_text_seq("CHAIN") 6672 6673 if is_rollback: 6674 return self.expression(exp.Rollback, savepoint=savepoint) 6675 6676 return self.expression(exp.Commit, chain=chain) 6677 6678 def _parse_refresh(self) -> exp.Refresh: 6679 self._match(TokenType.TABLE) 6680 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6681 6682 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6683 if not self._match_text_seq("ADD"): 6684 return None 6685 6686 self._match(TokenType.COLUMN) 6687 exists_column = self._parse_exists(not_=True) 6688 expression = self._parse_field_def() 6689 6690 if expression: 6691 expression.set("exists", exists_column) 6692 6693 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6694 if self._match_texts(("FIRST", "AFTER")): 6695 position = self._prev.text 6696 column_position = self.expression( 6697 exp.ColumnPosition, this=self._parse_column(), position=position 6698 ) 6699 expression.set("position", column_position) 6700 6701 return expression 6702 6703 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6704 drop = self._match(TokenType.DROP) and self._parse_drop() 6705 if drop and not isinstance(drop, exp.Command): 6706 drop.set("kind", drop.args.get("kind", "COLUMN")) 6707 return drop 6708 6709 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6710 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6711 return self.expression( 6712 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6713 ) 6714 6715 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6716 index = self._index - 1 6717 6718 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6719 return self._parse_csv( 6720 lambda: self.expression( 6721 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6722 ) 6723 ) 6724 6725 self._retreat(index) 6726 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6727 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6728 6729 if self._match_text_seq("ADD", "COLUMNS"): 6730 schema = self._parse_schema() 6731 if schema: 6732 return [schema] 6733 return [] 6734 6735 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6736 6737 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6738 if self._match_texts(self.ALTER_ALTER_PARSERS): 6739 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6740 6741 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6742 # keyword after ALTER we default to parsing this statement 6743 self._match(TokenType.COLUMN) 6744 column = self._parse_field(any_token=True) 6745 6746 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6747 return self.expression(exp.AlterColumn, this=column, drop=True) 6748 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6749 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6750 if self._match(TokenType.COMMENT): 6751 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6752 if self._match_text_seq("DROP", "NOT", "NULL"): 6753 return self.expression( 6754 exp.AlterColumn, 6755 this=column, 6756 drop=True, 6757 allow_null=True, 6758 ) 6759 if self._match_text_seq("SET", "NOT", "NULL"): 6760 return self.expression( 6761 exp.AlterColumn, 6762 this=column, 6763 allow_null=False, 6764 ) 6765 self._match_text_seq("SET", "DATA") 6766 self._match_text_seq("TYPE") 6767 return self.expression( 6768 exp.AlterColumn, 6769 this=column, 6770 dtype=self._parse_types(), 6771 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6772 using=self._match(TokenType.USING) and self._parse_assignment(), 6773 ) 6774 6775 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6776 if self._match_texts(("ALL", "EVEN", "AUTO")): 6777 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6778 6779 self._match_text_seq("KEY", "DISTKEY") 6780 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6781 6782 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6783 if compound: 6784 self._match_text_seq("SORTKEY") 6785 6786 if self._match(TokenType.L_PAREN, advance=False): 6787 return self.expression( 6788 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6789 ) 6790 6791 self._match_texts(("AUTO", "NONE")) 6792 return self.expression( 6793 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6794 ) 6795 6796 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6797 index = self._index - 1 6798 6799 partition_exists = self._parse_exists() 6800 if self._match(TokenType.PARTITION, advance=False): 6801 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6802 6803 self._retreat(index) 6804 return self._parse_csv(self._parse_drop_column) 6805 6806 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6807 if self._match(TokenType.COLUMN): 6808 exists = self._parse_exists() 6809 old_column = self._parse_column() 6810 to = self._match_text_seq("TO") 6811 new_column = self._parse_column() 6812 6813 if old_column is None or to is None or new_column is None: 6814 return None 6815 6816 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6817 6818 self._match_text_seq("TO") 6819 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6820 6821 def _parse_alter_table_set(self) -> exp.AlterSet: 6822 alter_set = self.expression(exp.AlterSet) 6823 6824 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6825 "TABLE", "PROPERTIES" 6826 ): 6827 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6828 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6829 alter_set.set("expressions", [self._parse_assignment()]) 6830 elif self._match_texts(("LOGGED", "UNLOGGED")): 6831 alter_set.set("option", exp.var(self._prev.text.upper())) 6832 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6833 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6834 elif self._match_text_seq("LOCATION"): 6835 alter_set.set("location", self._parse_field()) 6836 elif self._match_text_seq("ACCESS", "METHOD"): 6837 alter_set.set("access_method", self._parse_field()) 6838 elif self._match_text_seq("TABLESPACE"): 6839 alter_set.set("tablespace", self._parse_field()) 6840 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6841 alter_set.set("file_format", [self._parse_field()]) 6842 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6843 alter_set.set("file_format", self._parse_wrapped_options()) 6844 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6845 alter_set.set("copy_options", self._parse_wrapped_options()) 6846 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6847 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6848 else: 6849 if self._match_text_seq("SERDE"): 6850 alter_set.set("serde", self._parse_field()) 6851 6852 alter_set.set("expressions", [self._parse_properties()]) 6853 6854 return alter_set 6855 6856 def _parse_alter(self) -> exp.Alter | exp.Command: 6857 start = self._prev 6858 6859 alter_token = self._match_set(self.ALTERABLES) and self._prev 6860 if not alter_token: 6861 return self._parse_as_command(start) 6862 6863 exists = self._parse_exists() 6864 only = self._match_text_seq("ONLY") 6865 this = self._parse_table(schema=True) 6866 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6867 6868 if self._next: 6869 self._advance() 6870 6871 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6872 if parser: 6873 actions = ensure_list(parser(self)) 6874 not_valid = self._match_text_seq("NOT", "VALID") 6875 options = self._parse_csv(self._parse_property) 6876 6877 if not self._curr and actions: 6878 return self.expression( 6879 exp.Alter, 6880 this=this, 6881 kind=alter_token.text.upper(), 6882 exists=exists, 6883 actions=actions, 6884 only=only, 6885 options=options, 6886 cluster=cluster, 6887 not_valid=not_valid, 6888 ) 6889 6890 return self._parse_as_command(start) 6891 6892 def _parse_merge(self) -> exp.Merge: 6893 self._match(TokenType.INTO) 6894 target = self._parse_table() 6895 6896 if target and self._match(TokenType.ALIAS, advance=False): 6897 target.set("alias", self._parse_table_alias()) 6898 6899 self._match(TokenType.USING) 6900 using = self._parse_table() 6901 6902 self._match(TokenType.ON) 6903 on = self._parse_assignment() 6904 6905 return self.expression( 6906 exp.Merge, 6907 this=target, 6908 using=using, 6909 on=on, 6910 expressions=self._parse_when_matched(), 6911 returning=self._parse_returning(), 6912 ) 6913 6914 def _parse_when_matched(self) -> t.List[exp.When]: 6915 whens = [] 6916 6917 while self._match(TokenType.WHEN): 6918 matched = not self._match(TokenType.NOT) 6919 self._match_text_seq("MATCHED") 6920 source = ( 6921 False 6922 if self._match_text_seq("BY", "TARGET") 6923 else self._match_text_seq("BY", "SOURCE") 6924 ) 6925 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6926 6927 self._match(TokenType.THEN) 6928 6929 if self._match(TokenType.INSERT): 6930 this = self._parse_star() 6931 if this: 6932 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6933 else: 6934 then = self.expression( 6935 exp.Insert, 6936 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6937 expression=self._match_text_seq("VALUES") and self._parse_value(), 6938 ) 6939 elif self._match(TokenType.UPDATE): 6940 expressions = self._parse_star() 6941 if expressions: 6942 then = self.expression(exp.Update, expressions=expressions) 6943 else: 6944 then = self.expression( 6945 exp.Update, 6946 expressions=self._match(TokenType.SET) 6947 and self._parse_csv(self._parse_equality), 6948 ) 6949 elif self._match(TokenType.DELETE): 6950 then = self.expression(exp.Var, this=self._prev.text) 6951 else: 6952 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6953 6954 whens.append( 6955 self.expression( 6956 exp.When, 6957 matched=matched, 6958 source=source, 6959 condition=condition, 6960 then=then, 6961 ) 6962 ) 6963 return whens 6964 6965 def _parse_show(self) -> t.Optional[exp.Expression]: 6966 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6967 if parser: 6968 return parser(self) 6969 return self._parse_as_command(self._prev) 6970 6971 def _parse_set_item_assignment( 6972 self, kind: t.Optional[str] = None 6973 ) -> t.Optional[exp.Expression]: 6974 index = self._index 6975 6976 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6977 return self._parse_set_transaction(global_=kind == "GLOBAL") 6978 6979 left = self._parse_primary() or self._parse_column() 6980 assignment_delimiter = self._match_texts(("=", "TO")) 6981 6982 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6983 self._retreat(index) 6984 return None 6985 6986 right = self._parse_statement() or self._parse_id_var() 6987 if isinstance(right, (exp.Column, exp.Identifier)): 6988 right = exp.var(right.name) 6989 6990 this = self.expression(exp.EQ, this=left, expression=right) 6991 return self.expression(exp.SetItem, this=this, kind=kind) 6992 6993 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6994 self._match_text_seq("TRANSACTION") 6995 characteristics = self._parse_csv( 6996 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6997 ) 6998 return self.expression( 6999 exp.SetItem, 7000 expressions=characteristics, 7001 kind="TRANSACTION", 7002 **{"global": global_}, # type: ignore 7003 ) 7004 7005 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7006 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7007 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7008 7009 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7010 index = self._index 7011 set_ = self.expression( 7012 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7013 ) 7014 7015 if self._curr: 7016 self._retreat(index) 7017 return self._parse_as_command(self._prev) 7018 7019 return set_ 7020 7021 def _parse_var_from_options( 7022 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7023 ) -> t.Optional[exp.Var]: 7024 start = self._curr 7025 if not start: 7026 return None 7027 7028 option = start.text.upper() 7029 continuations = options.get(option) 7030 7031 index = self._index 7032 self._advance() 7033 for keywords in continuations or []: 7034 if isinstance(keywords, str): 7035 keywords = (keywords,) 7036 7037 if self._match_text_seq(*keywords): 7038 option = f"{option} {' '.join(keywords)}" 7039 break 7040 else: 7041 if continuations or continuations is None: 7042 if raise_unmatched: 7043 self.raise_error(f"Unknown option {option}") 7044 7045 self._retreat(index) 7046 return None 7047 7048 return exp.var(option) 7049 7050 def _parse_as_command(self, start: Token) -> exp.Command: 7051 while self._curr: 7052 self._advance() 7053 text = self._find_sql(start, self._prev) 7054 size = len(start.text) 7055 self._warn_unsupported() 7056 return exp.Command(this=text[:size], expression=text[size:]) 7057 7058 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7059 settings = [] 7060 7061 self._match_l_paren() 7062 kind = self._parse_id_var() 7063 7064 if self._match(TokenType.L_PAREN): 7065 while True: 7066 key = self._parse_id_var() 7067 value = self._parse_primary() 7068 7069 if not key and value is None: 7070 break 7071 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7072 self._match(TokenType.R_PAREN) 7073 7074 self._match_r_paren() 7075 7076 return self.expression( 7077 exp.DictProperty, 7078 this=this, 7079 kind=kind.this if kind else None, 7080 settings=settings, 7081 ) 7082 7083 def _parse_dict_range(self, this: str) -> exp.DictRange: 7084 self._match_l_paren() 7085 has_min = self._match_text_seq("MIN") 7086 if has_min: 7087 min = self._parse_var() or self._parse_primary() 7088 self._match_text_seq("MAX") 7089 max = self._parse_var() or self._parse_primary() 7090 else: 7091 max = self._parse_var() or self._parse_primary() 7092 min = exp.Literal.number(0) 7093 self._match_r_paren() 7094 return self.expression(exp.DictRange, this=this, min=min, max=max) 7095 7096 def _parse_comprehension( 7097 self, this: t.Optional[exp.Expression] 7098 ) -> t.Optional[exp.Comprehension]: 7099 index = self._index 7100 expression = self._parse_column() 7101 if not self._match(TokenType.IN): 7102 self._retreat(index - 1) 7103 return None 7104 iterator = self._parse_column() 7105 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7106 return self.expression( 7107 exp.Comprehension, 7108 this=this, 7109 expression=expression, 7110 iterator=iterator, 7111 condition=condition, 7112 ) 7113 7114 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7115 if self._match(TokenType.HEREDOC_STRING): 7116 return self.expression(exp.Heredoc, this=self._prev.text) 7117 7118 if not self._match_text_seq("$"): 7119 return None 7120 7121 tags = ["$"] 7122 tag_text = None 7123 7124 if self._is_connected(): 7125 self._advance() 7126 tags.append(self._prev.text.upper()) 7127 else: 7128 self.raise_error("No closing $ found") 7129 7130 if tags[-1] != "$": 7131 if self._is_connected() and self._match_text_seq("$"): 7132 tag_text = tags[-1] 7133 tags.append("$") 7134 else: 7135 self.raise_error("No closing $ found") 7136 7137 heredoc_start = self._curr 7138 7139 while self._curr: 7140 if self._match_text_seq(*tags, advance=False): 7141 this = self._find_sql(heredoc_start, self._prev) 7142 self._advance(len(tags)) 7143 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7144 7145 self._advance() 7146 7147 self.raise_error(f"No closing {''.join(tags)} found") 7148 return None 7149 7150 def _find_parser( 7151 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7152 ) -> t.Optional[t.Callable]: 7153 if not self._curr: 7154 return None 7155 7156 index = self._index 7157 this = [] 7158 while True: 7159 # The current token might be multiple words 7160 curr = self._curr.text.upper() 7161 key = curr.split(" ") 7162 this.append(curr) 7163 7164 self._advance() 7165 result, trie = in_trie(trie, key) 7166 if result == TrieResult.FAILED: 7167 break 7168 7169 if result == TrieResult.EXISTS: 7170 subparser = parsers[" ".join(this)] 7171 return subparser 7172 7173 self._retreat(index) 7174 return None 7175 7176 def _match(self, token_type, advance=True, expression=None): 7177 if not self._curr: 7178 return None 7179 7180 if self._curr.token_type == token_type: 7181 if advance: 7182 self._advance() 7183 self._add_comments(expression) 7184 return True 7185 7186 return None 7187 7188 def _match_set(self, types, advance=True): 7189 if not self._curr: 7190 return None 7191 7192 if self._curr.token_type in types: 7193 if advance: 7194 self._advance() 7195 return True 7196 7197 return None 7198 7199 def _match_pair(self, token_type_a, token_type_b, advance=True): 7200 if not self._curr or not self._next: 7201 return None 7202 7203 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7204 if advance: 7205 self._advance(2) 7206 return True 7207 7208 return None 7209 7210 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7211 if not self._match(TokenType.L_PAREN, expression=expression): 7212 self.raise_error("Expecting (") 7213 7214 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7215 if not self._match(TokenType.R_PAREN, expression=expression): 7216 self.raise_error("Expecting )") 7217 7218 def _match_texts(self, texts, advance=True): 7219 if ( 7220 self._curr 7221 and self._curr.token_type != TokenType.STRING 7222 and self._curr.text.upper() in texts 7223 ): 7224 if advance: 7225 self._advance() 7226 return True 7227 return None 7228 7229 def _match_text_seq(self, *texts, advance=True): 7230 index = self._index 7231 for text in texts: 7232 if ( 7233 self._curr 7234 and self._curr.token_type != TokenType.STRING 7235 and self._curr.text.upper() == text 7236 ): 7237 self._advance() 7238 else: 7239 self._retreat(index) 7240 return None 7241 7242 if not advance: 7243 self._retreat(index) 7244 7245 return True 7246 7247 def _replace_lambda( 7248 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7249 ) -> t.Optional[exp.Expression]: 7250 if not node: 7251 return node 7252 7253 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7254 7255 for column in node.find_all(exp.Column): 7256 typ = lambda_types.get(column.parts[0].name) 7257 if typ is not None: 7258 dot_or_id = column.to_dot() if column.table else column.this 7259 7260 if typ: 7261 dot_or_id = self.expression( 7262 exp.Cast, 7263 this=dot_or_id, 7264 to=typ, 7265 ) 7266 7267 parent = column.parent 7268 7269 while isinstance(parent, exp.Dot): 7270 if not isinstance(parent.parent, exp.Dot): 7271 parent.replace(dot_or_id) 7272 break 7273 parent = parent.parent 7274 else: 7275 if column is node: 7276 node = dot_or_id 7277 else: 7278 column.replace(dot_or_id) 7279 return node 7280 7281 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7282 start = self._prev 7283 7284 # Not to be confused with TRUNCATE(number, decimals) function call 7285 if self._match(TokenType.L_PAREN): 7286 self._retreat(self._index - 2) 7287 return self._parse_function() 7288 7289 # Clickhouse supports TRUNCATE DATABASE as well 7290 is_database = self._match(TokenType.DATABASE) 7291 7292 self._match(TokenType.TABLE) 7293 7294 exists = self._parse_exists(not_=False) 7295 7296 expressions = self._parse_csv( 7297 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7298 ) 7299 7300 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7301 7302 if self._match_text_seq("RESTART", "IDENTITY"): 7303 identity = "RESTART" 7304 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7305 identity = "CONTINUE" 7306 else: 7307 identity = None 7308 7309 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7310 option = self._prev.text 7311 else: 7312 option = None 7313 7314 partition = self._parse_partition() 7315 7316 # Fallback case 7317 if self._curr: 7318 return self._parse_as_command(start) 7319 7320 return self.expression( 7321 exp.TruncateTable, 7322 expressions=expressions, 7323 is_database=is_database, 7324 exists=exists, 7325 cluster=cluster, 7326 identity=identity, 7327 option=option, 7328 partition=partition, 7329 ) 7330 7331 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7332 this = self._parse_ordered(self._parse_opclass) 7333 7334 if not self._match(TokenType.WITH): 7335 return this 7336 7337 op = self._parse_var(any_token=True) 7338 7339 return self.expression(exp.WithOperator, this=this, op=op) 7340 7341 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7342 self._match(TokenType.EQ) 7343 self._match(TokenType.L_PAREN) 7344 7345 opts: t.List[t.Optional[exp.Expression]] = [] 7346 while self._curr and not self._match(TokenType.R_PAREN): 7347 if self._match_text_seq("FORMAT_NAME", "="): 7348 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7349 # so we parse it separately to use _parse_field() 7350 prop = self.expression( 7351 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7352 ) 7353 opts.append(prop) 7354 else: 7355 opts.append(self._parse_property()) 7356 7357 self._match(TokenType.COMMA) 7358 7359 return opts 7360 7361 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7362 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7363 7364 options = [] 7365 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7366 option = self._parse_var(any_token=True) 7367 prev = self._prev.text.upper() 7368 7369 # Different dialects might separate options and values by white space, "=" and "AS" 7370 self._match(TokenType.EQ) 7371 self._match(TokenType.ALIAS) 7372 7373 param = self.expression(exp.CopyParameter, this=option) 7374 7375 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7376 TokenType.L_PAREN, advance=False 7377 ): 7378 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7379 param.set("expressions", self._parse_wrapped_options()) 7380 elif prev == "FILE_FORMAT": 7381 # T-SQL's external file format case 7382 param.set("expression", self._parse_field()) 7383 else: 7384 param.set("expression", self._parse_unquoted_field()) 7385 7386 options.append(param) 7387 self._match(sep) 7388 7389 return options 7390 7391 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7392 expr = self.expression(exp.Credentials) 7393 7394 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7395 expr.set("storage", self._parse_field()) 7396 if self._match_text_seq("CREDENTIALS"): 7397 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7398 creds = ( 7399 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7400 ) 7401 expr.set("credentials", creds) 7402 if self._match_text_seq("ENCRYPTION"): 7403 expr.set("encryption", self._parse_wrapped_options()) 7404 if self._match_text_seq("IAM_ROLE"): 7405 expr.set("iam_role", self._parse_field()) 7406 if self._match_text_seq("REGION"): 7407 expr.set("region", self._parse_field()) 7408 7409 return expr 7410 7411 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7412 return self._parse_field() 7413 7414 def _parse_copy(self) -> exp.Copy | exp.Command: 7415 start = self._prev 7416 7417 self._match(TokenType.INTO) 7418 7419 this = ( 7420 self._parse_select(nested=True, parse_subquery_alias=False) 7421 if self._match(TokenType.L_PAREN, advance=False) 7422 else self._parse_table(schema=True) 7423 ) 7424 7425 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7426 7427 files = self._parse_csv(self._parse_file_location) 7428 credentials = self._parse_credentials() 7429 7430 self._match_text_seq("WITH") 7431 7432 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7433 7434 # Fallback case 7435 if self._curr: 7436 return self._parse_as_command(start) 7437 7438 return self.expression( 7439 exp.Copy, 7440 this=this, 7441 kind=kind, 7442 credentials=credentials, 7443 files=files, 7444 params=params, 7445 ) 7446 7447 def _parse_normalize(self) -> exp.Normalize: 7448 return self.expression( 7449 exp.Normalize, 7450 this=self._parse_bitwise(), 7451 form=self._match(TokenType.COMMA) and self._parse_var(), 7452 ) 7453 7454 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7455 if self._match_text_seq("COLUMNS", "(", advance=False): 7456 this = self._parse_function() 7457 if isinstance(this, exp.Columns): 7458 this.set("unpack", True) 7459 return this 7460 7461 return self.expression( 7462 exp.Star, 7463 **{ # type: ignore 7464 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7465 "replace": self._parse_star_op("REPLACE"), 7466 "rename": self._parse_star_op("RENAME"), 7467 }, 7468 ) 7469 7470 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7471 privilege_parts = [] 7472 7473 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7474 # (end of privilege list) or L_PAREN (start of column list) are met 7475 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7476 privilege_parts.append(self._curr.text.upper()) 7477 self._advance() 7478 7479 this = exp.var(" ".join(privilege_parts)) 7480 expressions = ( 7481 self._parse_wrapped_csv(self._parse_column) 7482 if self._match(TokenType.L_PAREN, advance=False) 7483 else None 7484 ) 7485 7486 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7487 7488 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7489 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7490 principal = self._parse_id_var() 7491 7492 if not principal: 7493 return None 7494 7495 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7496 7497 def _parse_grant(self) -> exp.Grant | exp.Command: 7498 start = self._prev 7499 7500 privileges = self._parse_csv(self._parse_grant_privilege) 7501 7502 self._match(TokenType.ON) 7503 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7504 7505 # Attempt to parse the securable e.g. MySQL allows names 7506 # such as "foo.*", "*.*" which are not easily parseable yet 7507 securable = self._try_parse(self._parse_table_parts) 7508 7509 if not securable or not self._match_text_seq("TO"): 7510 return self._parse_as_command(start) 7511 7512 principals = self._parse_csv(self._parse_grant_principal) 7513 7514 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7515 7516 if self._curr: 7517 return self._parse_as_command(start) 7518 7519 return self.expression( 7520 exp.Grant, 7521 privileges=privileges, 7522 kind=kind, 7523 securable=securable, 7524 principals=principals, 7525 grant_option=grant_option, 7526 ) 7527 7528 def _parse_overlay(self) -> exp.Overlay: 7529 return self.expression( 7530 exp.Overlay, 7531 **{ # type: ignore 7532 "this": self._parse_bitwise(), 7533 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7534 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7535 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7536 }, 7537 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.HLLSKETCH, 361 TokenType.HSTORE, 362 TokenType.PSEUDO_TYPE, 363 TokenType.SUPER, 364 TokenType.SERIAL, 365 TokenType.SMALLSERIAL, 366 TokenType.BIGSERIAL, 367 TokenType.XML, 368 TokenType.YEAR, 369 TokenType.UNIQUEIDENTIFIER, 370 TokenType.USERDEFINED, 371 TokenType.MONEY, 372 TokenType.SMALLMONEY, 373 TokenType.ROWVERSION, 374 TokenType.IMAGE, 375 TokenType.VARIANT, 376 TokenType.VECTOR, 377 TokenType.OBJECT, 378 TokenType.OBJECT_IDENTIFIER, 379 TokenType.INET, 380 TokenType.IPADDRESS, 381 TokenType.IPPREFIX, 382 TokenType.IPV4, 383 TokenType.IPV6, 384 TokenType.UNKNOWN, 385 TokenType.NULL, 386 TokenType.NAME, 387 TokenType.TDIGEST, 388 *ENUM_TYPE_TOKENS, 389 *NESTED_TYPE_TOKENS, 390 *AGGREGATE_TYPE_TOKENS, 391 } 392 393 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 394 TokenType.BIGINT: TokenType.UBIGINT, 395 TokenType.INT: TokenType.UINT, 396 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 397 TokenType.SMALLINT: TokenType.USMALLINT, 398 TokenType.TINYINT: TokenType.UTINYINT, 399 TokenType.DECIMAL: TokenType.UDECIMAL, 400 } 401 402 SUBQUERY_PREDICATES = { 403 TokenType.ANY: exp.Any, 404 TokenType.ALL: exp.All, 405 TokenType.EXISTS: exp.Exists, 406 TokenType.SOME: exp.Any, 407 } 408 409 RESERVED_TOKENS = { 410 *Tokenizer.SINGLE_TOKENS.values(), 411 TokenType.SELECT, 412 } - {TokenType.IDENTIFIER} 413 414 DB_CREATABLES = { 415 TokenType.DATABASE, 416 TokenType.DICTIONARY, 417 TokenType.MODEL, 418 TokenType.SCHEMA, 419 TokenType.SEQUENCE, 420 TokenType.STORAGE_INTEGRATION, 421 TokenType.TABLE, 422 TokenType.TAG, 423 TokenType.VIEW, 424 TokenType.WAREHOUSE, 425 TokenType.STREAMLIT, 426 } 427 428 CREATABLES = { 429 TokenType.COLUMN, 430 TokenType.CONSTRAINT, 431 TokenType.FOREIGN_KEY, 432 TokenType.FUNCTION, 433 TokenType.INDEX, 434 TokenType.PROCEDURE, 435 *DB_CREATABLES, 436 } 437 438 ALTERABLES = { 439 TokenType.INDEX, 440 TokenType.TABLE, 441 TokenType.VIEW, 442 } 443 444 # Tokens that can represent identifiers 445 ID_VAR_TOKENS = { 446 TokenType.ALL, 447 TokenType.VAR, 448 TokenType.ANTI, 449 TokenType.APPLY, 450 TokenType.ASC, 451 TokenType.ASOF, 452 TokenType.AUTO_INCREMENT, 453 TokenType.BEGIN, 454 TokenType.BPCHAR, 455 TokenType.CACHE, 456 TokenType.CASE, 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.COMMENT, 460 TokenType.COMMIT, 461 TokenType.CONSTRAINT, 462 TokenType.COPY, 463 TokenType.CUBE, 464 TokenType.DEFAULT, 465 TokenType.DELETE, 466 TokenType.DESC, 467 TokenType.DESCRIBE, 468 TokenType.DICTIONARY, 469 TokenType.DIV, 470 TokenType.END, 471 TokenType.EXECUTE, 472 TokenType.ESCAPE, 473 TokenType.FALSE, 474 TokenType.FIRST, 475 TokenType.FILTER, 476 TokenType.FINAL, 477 TokenType.FORMAT, 478 TokenType.FULL, 479 TokenType.IDENTIFIER, 480 TokenType.IS, 481 TokenType.ISNULL, 482 TokenType.INTERVAL, 483 TokenType.KEEP, 484 TokenType.KILL, 485 TokenType.LEFT, 486 TokenType.LOAD, 487 TokenType.MERGE, 488 TokenType.NATURAL, 489 TokenType.NEXT, 490 TokenType.OFFSET, 491 TokenType.OPERATOR, 492 TokenType.ORDINALITY, 493 TokenType.OVERLAPS, 494 TokenType.OVERWRITE, 495 TokenType.PARTITION, 496 TokenType.PERCENT, 497 TokenType.PIVOT, 498 TokenType.PRAGMA, 499 TokenType.RANGE, 500 TokenType.RECURSIVE, 501 TokenType.REFERENCES, 502 TokenType.REFRESH, 503 TokenType.RENAME, 504 TokenType.REPLACE, 505 TokenType.RIGHT, 506 TokenType.ROLLUP, 507 TokenType.ROW, 508 TokenType.ROWS, 509 TokenType.SEMI, 510 TokenType.SET, 511 TokenType.SETTINGS, 512 TokenType.SHOW, 513 TokenType.TEMPORARY, 514 TokenType.TOP, 515 TokenType.TRUE, 516 TokenType.TRUNCATE, 517 TokenType.UNIQUE, 518 TokenType.UNNEST, 519 TokenType.UNPIVOT, 520 TokenType.UPDATE, 521 TokenType.USE, 522 TokenType.VOLATILE, 523 TokenType.WINDOW, 524 *CREATABLES, 525 *SUBQUERY_PREDICATES, 526 *TYPE_TOKENS, 527 *NO_PAREN_FUNCTIONS, 528 } 529 ID_VAR_TOKENS.remove(TokenType.UNION) 530 531 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 532 533 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 534 TokenType.ANTI, 535 TokenType.APPLY, 536 TokenType.ASOF, 537 TokenType.FULL, 538 TokenType.LEFT, 539 TokenType.LOCK, 540 TokenType.NATURAL, 541 TokenType.OFFSET, 542 TokenType.RIGHT, 543 TokenType.SEMI, 544 TokenType.WINDOW, 545 } 546 547 ALIAS_TOKENS = ID_VAR_TOKENS 548 549 ARRAY_CONSTRUCTORS = { 550 "ARRAY": exp.Array, 551 "LIST": exp.List, 552 } 553 554 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 555 556 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 557 558 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 559 560 FUNC_TOKENS = { 561 TokenType.COLLATE, 562 TokenType.COMMAND, 563 TokenType.CURRENT_DATE, 564 TokenType.CURRENT_DATETIME, 565 TokenType.CURRENT_TIMESTAMP, 566 TokenType.CURRENT_TIME, 567 TokenType.CURRENT_USER, 568 TokenType.FILTER, 569 TokenType.FIRST, 570 TokenType.FORMAT, 571 TokenType.GLOB, 572 TokenType.IDENTIFIER, 573 TokenType.INDEX, 574 TokenType.ISNULL, 575 TokenType.ILIKE, 576 TokenType.INSERT, 577 TokenType.LIKE, 578 TokenType.MERGE, 579 TokenType.OFFSET, 580 TokenType.PRIMARY_KEY, 581 TokenType.RANGE, 582 TokenType.REPLACE, 583 TokenType.RLIKE, 584 TokenType.ROW, 585 TokenType.UNNEST, 586 TokenType.VAR, 587 TokenType.LEFT, 588 TokenType.RIGHT, 589 TokenType.SEQUENCE, 590 TokenType.DATE, 591 TokenType.DATETIME, 592 TokenType.TABLE, 593 TokenType.TIMESTAMP, 594 TokenType.TIMESTAMPTZ, 595 TokenType.TRUNCATE, 596 TokenType.WINDOW, 597 TokenType.XOR, 598 *TYPE_TOKENS, 599 *SUBQUERY_PREDICATES, 600 } 601 602 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.AND: exp.And, 604 } 605 606 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 607 TokenType.COLON_EQ: exp.PropertyEQ, 608 } 609 610 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 611 TokenType.OR: exp.Or, 612 } 613 614 EQUALITY = { 615 TokenType.EQ: exp.EQ, 616 TokenType.NEQ: exp.NEQ, 617 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 618 } 619 620 COMPARISON = { 621 TokenType.GT: exp.GT, 622 TokenType.GTE: exp.GTE, 623 TokenType.LT: exp.LT, 624 TokenType.LTE: exp.LTE, 625 } 626 627 BITWISE = { 628 TokenType.AMP: exp.BitwiseAnd, 629 TokenType.CARET: exp.BitwiseXor, 630 TokenType.PIPE: exp.BitwiseOr, 631 } 632 633 TERM = { 634 TokenType.DASH: exp.Sub, 635 TokenType.PLUS: exp.Add, 636 TokenType.MOD: exp.Mod, 637 TokenType.COLLATE: exp.Collate, 638 } 639 640 FACTOR = { 641 TokenType.DIV: exp.IntDiv, 642 TokenType.LR_ARROW: exp.Distance, 643 TokenType.SLASH: exp.Div, 644 TokenType.STAR: exp.Mul, 645 } 646 647 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 648 649 TIMES = { 650 TokenType.TIME, 651 TokenType.TIMETZ, 652 } 653 654 TIMESTAMPS = { 655 TokenType.TIMESTAMP, 656 TokenType.TIMESTAMPTZ, 657 TokenType.TIMESTAMPLTZ, 658 *TIMES, 659 } 660 661 SET_OPERATIONS = { 662 TokenType.UNION, 663 TokenType.INTERSECT, 664 TokenType.EXCEPT, 665 } 666 667 JOIN_METHODS = { 668 TokenType.ASOF, 669 TokenType.NATURAL, 670 TokenType.POSITIONAL, 671 } 672 673 JOIN_SIDES = { 674 TokenType.LEFT, 675 TokenType.RIGHT, 676 TokenType.FULL, 677 } 678 679 JOIN_KINDS = { 680 TokenType.ANTI, 681 TokenType.CROSS, 682 TokenType.INNER, 683 TokenType.OUTER, 684 TokenType.SEMI, 685 TokenType.STRAIGHT_JOIN, 686 } 687 688 JOIN_HINTS: t.Set[str] = set() 689 690 LAMBDAS = { 691 TokenType.ARROW: lambda self, expressions: self.expression( 692 exp.Lambda, 693 this=self._replace_lambda( 694 self._parse_assignment(), 695 expressions, 696 ), 697 expressions=expressions, 698 ), 699 TokenType.FARROW: lambda self, expressions: self.expression( 700 exp.Kwarg, 701 this=exp.var(expressions[0].name), 702 expression=self._parse_assignment(), 703 ), 704 } 705 706 COLUMN_OPERATORS = { 707 TokenType.DOT: None, 708 TokenType.DCOLON: lambda self, this, to: self.expression( 709 exp.Cast if self.STRICT_CAST else exp.TryCast, 710 this=this, 711 to=to, 712 ), 713 TokenType.ARROW: lambda self, this, path: self.expression( 714 exp.JSONExtract, 715 this=this, 716 expression=self.dialect.to_json_path(path), 717 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 718 ), 719 TokenType.DARROW: lambda self, this, path: self.expression( 720 exp.JSONExtractScalar, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 726 exp.JSONBExtract, 727 this=this, 728 expression=path, 729 ), 730 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 731 exp.JSONBExtractScalar, 732 this=this, 733 expression=path, 734 ), 735 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 736 exp.JSONBContains, 737 this=this, 738 expression=key, 739 ), 740 } 741 742 EXPRESSION_PARSERS = { 743 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 744 exp.Column: lambda self: self._parse_column(), 745 exp.Condition: lambda self: self._parse_assignment(), 746 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 747 exp.Expression: lambda self: self._parse_expression(), 748 exp.From: lambda self: self._parse_from(joins=True), 749 exp.Group: lambda self: self._parse_group(), 750 exp.Having: lambda self: self._parse_having(), 751 exp.Identifier: lambda self: self._parse_id_var(), 752 exp.Join: lambda self: self._parse_join(), 753 exp.Lambda: lambda self: self._parse_lambda(), 754 exp.Lateral: lambda self: self._parse_lateral(), 755 exp.Limit: lambda self: self._parse_limit(), 756 exp.Offset: lambda self: self._parse_offset(), 757 exp.Order: lambda self: self._parse_order(), 758 exp.Ordered: lambda self: self._parse_ordered(), 759 exp.Properties: lambda self: self._parse_properties(), 760 exp.Qualify: lambda self: self._parse_qualify(), 761 exp.Returning: lambda self: self._parse_returning(), 762 exp.Select: lambda self: self._parse_select(), 763 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 764 exp.Table: lambda self: self._parse_table_parts(), 765 exp.TableAlias: lambda self: self._parse_table_alias(), 766 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 767 exp.Where: lambda self: self._parse_where(), 768 exp.Window: lambda self: self._parse_named_window(), 769 exp.With: lambda self: self._parse_with(), 770 "JOIN_TYPE": lambda self: self._parse_join_parts(), 771 } 772 773 STATEMENT_PARSERS = { 774 TokenType.ALTER: lambda self: self._parse_alter(), 775 TokenType.BEGIN: lambda self: self._parse_transaction(), 776 TokenType.CACHE: lambda self: self._parse_cache(), 777 TokenType.COMMENT: lambda self: self._parse_comment(), 778 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 779 TokenType.COPY: lambda self: self._parse_copy(), 780 TokenType.CREATE: lambda self: self._parse_create(), 781 TokenType.DELETE: lambda self: self._parse_delete(), 782 TokenType.DESC: lambda self: self._parse_describe(), 783 TokenType.DESCRIBE: lambda self: self._parse_describe(), 784 TokenType.DROP: lambda self: self._parse_drop(), 785 TokenType.GRANT: lambda self: self._parse_grant(), 786 TokenType.INSERT: lambda self: self._parse_insert(), 787 TokenType.KILL: lambda self: self._parse_kill(), 788 TokenType.LOAD: lambda self: self._parse_load(), 789 TokenType.MERGE: lambda self: self._parse_merge(), 790 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 791 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 792 TokenType.REFRESH: lambda self: self._parse_refresh(), 793 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 794 TokenType.SET: lambda self: self._parse_set(), 795 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 796 TokenType.UNCACHE: lambda self: self._parse_uncache(), 797 TokenType.UPDATE: lambda self: self._parse_update(), 798 TokenType.USE: lambda self: self.expression( 799 exp.Use, 800 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 801 this=self._parse_table(schema=False), 802 ), 803 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 804 } 805 806 UNARY_PARSERS = { 807 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 808 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 809 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 810 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 811 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 812 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 813 } 814 815 STRING_PARSERS = { 816 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 817 exp.RawString, this=token.text 818 ), 819 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 820 exp.National, this=token.text 821 ), 822 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 823 TokenType.STRING: lambda self, token: self.expression( 824 exp.Literal, this=token.text, is_string=True 825 ), 826 TokenType.UNICODE_STRING: lambda self, token: self.expression( 827 exp.UnicodeString, 828 this=token.text, 829 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 830 ), 831 } 832 833 NUMERIC_PARSERS = { 834 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 835 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 836 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 837 TokenType.NUMBER: lambda self, token: self.expression( 838 exp.Literal, this=token.text, is_string=False 839 ), 840 } 841 842 PRIMARY_PARSERS = { 843 **STRING_PARSERS, 844 **NUMERIC_PARSERS, 845 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 846 TokenType.NULL: lambda self, _: self.expression(exp.Null), 847 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 848 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 849 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 850 TokenType.STAR: lambda self, _: self._parse_star_ops(), 851 } 852 853 PLACEHOLDER_PARSERS = { 854 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 855 TokenType.PARAMETER: lambda self: self._parse_parameter(), 856 TokenType.COLON: lambda self: ( 857 self.expression(exp.Placeholder, this=self._prev.text) 858 if self._match_set(self.ID_VAR_TOKENS) 859 else None 860 ), 861 } 862 863 RANGE_PARSERS = { 864 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 865 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 866 TokenType.GLOB: binary_range_parser(exp.Glob), 867 TokenType.ILIKE: binary_range_parser(exp.ILike), 868 TokenType.IN: lambda self, this: self._parse_in(this), 869 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 870 TokenType.IS: lambda self, this: self._parse_is(this), 871 TokenType.LIKE: binary_range_parser(exp.Like), 872 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 873 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 874 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 875 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 876 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 877 } 878 879 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 880 "ALLOWED_VALUES": lambda self: self.expression( 881 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 882 ), 883 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 884 "AUTO": lambda self: self._parse_auto_property(), 885 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 886 "BACKUP": lambda self: self.expression( 887 exp.BackupProperty, this=self._parse_var(any_token=True) 888 ), 889 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 890 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 892 "CHECKSUM": lambda self: self._parse_checksum(), 893 "CLUSTER BY": lambda self: self._parse_cluster(), 894 "CLUSTERED": lambda self: self._parse_clustered_by(), 895 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 896 exp.CollateProperty, **kwargs 897 ), 898 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 899 "CONTAINS": lambda self: self._parse_contains_property(), 900 "COPY": lambda self: self._parse_copy_property(), 901 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 902 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 903 "DEFINER": lambda self: self._parse_definer(), 904 "DETERMINISTIC": lambda self: self.expression( 905 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 906 ), 907 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 908 "DUPLICATE": lambda self: self._parse_duplicate(), 909 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 910 "DISTKEY": lambda self: self._parse_distkey(), 911 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 912 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 913 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 914 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 915 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 916 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 917 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 918 "FREESPACE": lambda self: self._parse_freespace(), 919 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 920 "HEAP": lambda self: self.expression(exp.HeapProperty), 921 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 922 "IMMUTABLE": lambda self: self.expression( 923 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 924 ), 925 "INHERITS": lambda self: self.expression( 926 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 927 ), 928 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 929 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 930 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 931 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 932 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 933 "LIKE": lambda self: self._parse_create_like(), 934 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 935 "LOCK": lambda self: self._parse_locking(), 936 "LOCKING": lambda self: self._parse_locking(), 937 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 938 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 939 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 940 "MODIFIES": lambda self: self._parse_modifies_property(), 941 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 942 "NO": lambda self: self._parse_no_property(), 943 "ON": lambda self: self._parse_on_property(), 944 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 945 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 946 "PARTITION": lambda self: self._parse_partitioned_of(), 947 "PARTITION BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 949 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 950 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 951 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 952 "READS": lambda self: self._parse_reads_property(), 953 "REMOTE": lambda self: self._parse_remote_with_connection(), 954 "RETURNS": lambda self: self._parse_returns(), 955 "STRICT": lambda self: self.expression(exp.StrictProperty), 956 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 957 "ROW": lambda self: self._parse_row(), 958 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 959 "SAMPLE": lambda self: self.expression( 960 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 961 ), 962 "SECURE": lambda self: self.expression(exp.SecureProperty), 963 "SECURITY": lambda self: self._parse_security(), 964 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 965 "SETTINGS": lambda self: self._parse_settings_property(), 966 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 967 "SORTKEY": lambda self: self._parse_sortkey(), 968 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 969 "STABLE": lambda self: self.expression( 970 exp.StabilityProperty, this=exp.Literal.string("STABLE") 971 ), 972 "STORED": lambda self: self._parse_stored(), 973 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 974 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 975 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 976 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 977 "TO": lambda self: self._parse_to_table(), 978 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 979 "TRANSFORM": lambda self: self.expression( 980 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 981 ), 982 "TTL": lambda self: self._parse_ttl(), 983 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 984 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 985 "VOLATILE": lambda self: self._parse_volatile_property(), 986 "WITH": lambda self: self._parse_with_property(), 987 } 988 989 CONSTRAINT_PARSERS = { 990 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 991 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 992 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 993 "CHARACTER SET": lambda self: self.expression( 994 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 995 ), 996 "CHECK": lambda self: self.expression( 997 exp.CheckColumnConstraint, 998 this=self._parse_wrapped(self._parse_assignment), 999 enforced=self._match_text_seq("ENFORCED"), 1000 ), 1001 "COLLATE": lambda self: self.expression( 1002 exp.CollateColumnConstraint, 1003 this=self._parse_identifier() or self._parse_column(), 1004 ), 1005 "COMMENT": lambda self: self.expression( 1006 exp.CommentColumnConstraint, this=self._parse_string() 1007 ), 1008 "COMPRESS": lambda self: self._parse_compress(), 1009 "CLUSTERED": lambda self: self.expression( 1010 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "NONCLUSTERED": lambda self: self.expression( 1013 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1014 ), 1015 "DEFAULT": lambda self: self.expression( 1016 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1019 "EPHEMERAL": lambda self: self.expression( 1020 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1021 ), 1022 "EXCLUDE": lambda self: self.expression( 1023 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1024 ), 1025 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1026 "FORMAT": lambda self: self.expression( 1027 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1028 ), 1029 "GENERATED": lambda self: self._parse_generated_as_identity(), 1030 "IDENTITY": lambda self: self._parse_auto_increment(), 1031 "INLINE": lambda self: self._parse_inline(), 1032 "LIKE": lambda self: self._parse_create_like(), 1033 "NOT": lambda self: self._parse_not_constraint(), 1034 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1035 "ON": lambda self: ( 1036 self._match(TokenType.UPDATE) 1037 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1038 ) 1039 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1040 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1041 "PERIOD": lambda self: self._parse_period_for_system_time(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1043 "REFERENCES": lambda self: self._parse_references(match=False), 1044 "TITLE": lambda self: self.expression( 1045 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1046 ), 1047 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1048 "UNIQUE": lambda self: self._parse_unique(), 1049 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1050 "WITH": lambda self: self.expression( 1051 exp.Properties, expressions=self._parse_wrapped_properties() 1052 ), 1053 } 1054 1055 ALTER_PARSERS = { 1056 "ADD": lambda self: self._parse_alter_table_add(), 1057 "AS": lambda self: self._parse_select(), 1058 "ALTER": lambda self: self._parse_alter_table_alter(), 1059 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1060 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1061 "DROP": lambda self: self._parse_alter_table_drop(), 1062 "RENAME": lambda self: self._parse_alter_table_rename(), 1063 "SET": lambda self: self._parse_alter_table_set(), 1064 "SWAP": lambda self: self.expression( 1065 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1066 ), 1067 } 1068 1069 ALTER_ALTER_PARSERS = { 1070 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1071 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1072 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1073 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1074 } 1075 1076 SCHEMA_UNNAMED_CONSTRAINTS = { 1077 "CHECK", 1078 "EXCLUDE", 1079 "FOREIGN KEY", 1080 "LIKE", 1081 "PERIOD", 1082 "PRIMARY KEY", 1083 "UNIQUE", 1084 } 1085 1086 NO_PAREN_FUNCTION_PARSERS = { 1087 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1088 "CASE": lambda self: self._parse_case(), 1089 "CONNECT_BY_ROOT": lambda self: self.expression( 1090 exp.ConnectByRoot, this=self._parse_column() 1091 ), 1092 "IF": lambda self: self._parse_if(), 1093 "NEXT": lambda self: self._parse_next_value_for(), 1094 } 1095 1096 INVALID_FUNC_NAME_TOKENS = { 1097 TokenType.IDENTIFIER, 1098 TokenType.STRING, 1099 } 1100 1101 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1102 1103 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1104 1105 FUNCTION_PARSERS = { 1106 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1107 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1108 "DECODE": lambda self: self._parse_decode(), 1109 "EXTRACT": lambda self: self._parse_extract(), 1110 "GAP_FILL": lambda self: self._parse_gap_fill(), 1111 "JSON_OBJECT": lambda self: self._parse_json_object(), 1112 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1113 "JSON_TABLE": lambda self: self._parse_json_table(), 1114 "MATCH": lambda self: self._parse_match_against(), 1115 "NORMALIZE": lambda self: self._parse_normalize(), 1116 "OPENJSON": lambda self: self._parse_open_json(), 1117 "OVERLAY": lambda self: self._parse_overlay(), 1118 "POSITION": lambda self: self._parse_position(), 1119 "PREDICT": lambda self: self._parse_predict(), 1120 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1121 "STRING_AGG": lambda self: self._parse_string_agg(), 1122 "SUBSTRING": lambda self: self._parse_substring(), 1123 "TRIM": lambda self: self._parse_trim(), 1124 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1125 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1126 } 1127 1128 QUERY_MODIFIER_PARSERS = { 1129 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1130 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1131 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1132 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1133 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1134 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1135 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1136 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1137 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1138 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1139 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1140 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1141 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1142 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1143 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1144 TokenType.CLUSTER_BY: lambda self: ( 1145 "cluster", 1146 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1147 ), 1148 TokenType.DISTRIBUTE_BY: lambda self: ( 1149 "distribute", 1150 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1151 ), 1152 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1153 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1154 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1155 } 1156 1157 SET_PARSERS = { 1158 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1159 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1160 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1161 "TRANSACTION": lambda self: self._parse_set_transaction(), 1162 } 1163 1164 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1165 1166 TYPE_LITERAL_PARSERS = { 1167 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1168 } 1169 1170 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1171 1172 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1173 1174 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1175 1176 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1177 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1178 "ISOLATION": ( 1179 ("LEVEL", "REPEATABLE", "READ"), 1180 ("LEVEL", "READ", "COMMITTED"), 1181 ("LEVEL", "READ", "UNCOMITTED"), 1182 ("LEVEL", "SERIALIZABLE"), 1183 ), 1184 "READ": ("WRITE", "ONLY"), 1185 } 1186 1187 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1188 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1189 ) 1190 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1191 1192 CREATE_SEQUENCE: OPTIONS_TYPE = { 1193 "SCALE": ("EXTEND", "NOEXTEND"), 1194 "SHARD": ("EXTEND", "NOEXTEND"), 1195 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1196 **dict.fromkeys( 1197 ( 1198 "SESSION", 1199 "GLOBAL", 1200 "KEEP", 1201 "NOKEEP", 1202 "ORDER", 1203 "NOORDER", 1204 "NOCACHE", 1205 "CYCLE", 1206 "NOCYCLE", 1207 "NOMINVALUE", 1208 "NOMAXVALUE", 1209 "NOSCALE", 1210 "NOSHARD", 1211 ), 1212 tuple(), 1213 ), 1214 } 1215 1216 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1217 1218 USABLES: OPTIONS_TYPE = dict.fromkeys( 1219 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1220 ) 1221 1222 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1223 1224 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1225 "TYPE": ("EVOLUTION",), 1226 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1227 } 1228 1229 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1230 1231 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1232 1233 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1234 "NOT": ("ENFORCED",), 1235 "MATCH": ( 1236 "FULL", 1237 "PARTIAL", 1238 "SIMPLE", 1239 ), 1240 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1241 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1242 } 1243 1244 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1245 1246 CLONE_KEYWORDS = {"CLONE", "COPY"} 1247 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1248 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1249 1250 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1251 1252 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1253 1254 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1255 1256 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1257 1258 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1259 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1260 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1261 1262 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1263 1264 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1265 1266 ADD_CONSTRAINT_TOKENS = { 1267 TokenType.CONSTRAINT, 1268 TokenType.FOREIGN_KEY, 1269 TokenType.INDEX, 1270 TokenType.KEY, 1271 TokenType.PRIMARY_KEY, 1272 TokenType.UNIQUE, 1273 } 1274 1275 DISTINCT_TOKENS = {TokenType.DISTINCT} 1276 1277 NULL_TOKENS = {TokenType.NULL} 1278 1279 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1280 1281 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1282 1283 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1284 1285 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1286 1287 ODBC_DATETIME_LITERALS = { 1288 "d": exp.Date, 1289 "t": exp.Time, 1290 "ts": exp.Timestamp, 1291 } 1292 1293 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1294 1295 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1296 1297 # The style options for the DESCRIBE statement 1298 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1299 1300 OPERATION_MODIFIERS: t.Set[str] = set() 1301 1302 STRICT_CAST = True 1303 1304 PREFIXED_PIVOT_COLUMNS = False 1305 IDENTIFY_PIVOT_STRINGS = False 1306 1307 LOG_DEFAULTS_TO_LN = False 1308 1309 # Whether ADD is present for each column added by ALTER TABLE 1310 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1311 1312 # Whether the table sample clause expects CSV syntax 1313 TABLESAMPLE_CSV = False 1314 1315 # The default method used for table sampling 1316 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1317 1318 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1319 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1320 1321 # Whether the TRIM function expects the characters to trim as its first argument 1322 TRIM_PATTERN_FIRST = False 1323 1324 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1325 STRING_ALIASES = False 1326 1327 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1328 MODIFIERS_ATTACHED_TO_SET_OP = True 1329 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1330 1331 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1332 NO_PAREN_IF_COMMANDS = True 1333 1334 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1335 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1336 1337 # Whether the `:` operator is used to extract a value from a VARIANT column 1338 COLON_IS_VARIANT_EXTRACT = False 1339 1340 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1341 # If this is True and '(' is not found, the keyword will be treated as an identifier 1342 VALUES_FOLLOWED_BY_PAREN = True 1343 1344 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1345 SUPPORTS_IMPLICIT_UNNEST = False 1346 1347 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1348 INTERVAL_SPANS = True 1349 1350 # Whether a PARTITION clause can follow a table reference 1351 SUPPORTS_PARTITION_SELECTION = False 1352 1353 __slots__ = ( 1354 "error_level", 1355 "error_message_context", 1356 "max_errors", 1357 "dialect", 1358 "sql", 1359 "errors", 1360 "_tokens", 1361 "_index", 1362 "_curr", 1363 "_next", 1364 "_prev", 1365 "_prev_comments", 1366 ) 1367 1368 # Autofilled 1369 SHOW_TRIE: t.Dict = {} 1370 SET_TRIE: t.Dict = {} 1371 1372 def __init__( 1373 self, 1374 error_level: t.Optional[ErrorLevel] = None, 1375 error_message_context: int = 100, 1376 max_errors: int = 3, 1377 dialect: DialectType = None, 1378 ): 1379 from sqlglot.dialects import Dialect 1380 1381 self.error_level = error_level or ErrorLevel.IMMEDIATE 1382 self.error_message_context = error_message_context 1383 self.max_errors = max_errors 1384 self.dialect = Dialect.get_or_raise(dialect) 1385 self.reset() 1386 1387 def reset(self): 1388 self.sql = "" 1389 self.errors = [] 1390 self._tokens = [] 1391 self._index = 0 1392 self._curr = None 1393 self._next = None 1394 self._prev = None 1395 self._prev_comments = None 1396 1397 def parse( 1398 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1399 ) -> t.List[t.Optional[exp.Expression]]: 1400 """ 1401 Parses a list of tokens and returns a list of syntax trees, one tree 1402 per parsed SQL statement. 1403 1404 Args: 1405 raw_tokens: The list of tokens. 1406 sql: The original SQL string, used to produce helpful debug messages. 1407 1408 Returns: 1409 The list of the produced syntax trees. 1410 """ 1411 return self._parse( 1412 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1413 ) 1414 1415 def parse_into( 1416 self, 1417 expression_types: exp.IntoType, 1418 raw_tokens: t.List[Token], 1419 sql: t.Optional[str] = None, 1420 ) -> t.List[t.Optional[exp.Expression]]: 1421 """ 1422 Parses a list of tokens into a given Expression type. If a collection of Expression 1423 types is given instead, this method will try to parse the token list into each one 1424 of them, stopping at the first for which the parsing succeeds. 1425 1426 Args: 1427 expression_types: The expression type(s) to try and parse the token list into. 1428 raw_tokens: The list of tokens. 1429 sql: The original SQL string, used to produce helpful debug messages. 1430 1431 Returns: 1432 The target Expression. 1433 """ 1434 errors = [] 1435 for expression_type in ensure_list(expression_types): 1436 parser = self.EXPRESSION_PARSERS.get(expression_type) 1437 if not parser: 1438 raise TypeError(f"No parser registered for {expression_type}") 1439 1440 try: 1441 return self._parse(parser, raw_tokens, sql) 1442 except ParseError as e: 1443 e.errors[0]["into_expression"] = expression_type 1444 errors.append(e) 1445 1446 raise ParseError( 1447 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1448 errors=merge_errors(errors), 1449 ) from errors[-1] 1450 1451 def _parse( 1452 self, 1453 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1454 raw_tokens: t.List[Token], 1455 sql: t.Optional[str] = None, 1456 ) -> t.List[t.Optional[exp.Expression]]: 1457 self.reset() 1458 self.sql = sql or "" 1459 1460 total = len(raw_tokens) 1461 chunks: t.List[t.List[Token]] = [[]] 1462 1463 for i, token in enumerate(raw_tokens): 1464 if token.token_type == TokenType.SEMICOLON: 1465 if token.comments: 1466 chunks.append([token]) 1467 1468 if i < total - 1: 1469 chunks.append([]) 1470 else: 1471 chunks[-1].append(token) 1472 1473 expressions = [] 1474 1475 for tokens in chunks: 1476 self._index = -1 1477 self._tokens = tokens 1478 self._advance() 1479 1480 expressions.append(parse_method(self)) 1481 1482 if self._index < len(self._tokens): 1483 self.raise_error("Invalid expression / Unexpected token") 1484 1485 self.check_errors() 1486 1487 return expressions 1488 1489 def check_errors(self) -> None: 1490 """Logs or raises any found errors, depending on the chosen error level setting.""" 1491 if self.error_level == ErrorLevel.WARN: 1492 for error in self.errors: 1493 logger.error(str(error)) 1494 elif self.error_level == ErrorLevel.RAISE and self.errors: 1495 raise ParseError( 1496 concat_messages(self.errors, self.max_errors), 1497 errors=merge_errors(self.errors), 1498 ) 1499 1500 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1501 """ 1502 Appends an error in the list of recorded errors or raises it, depending on the chosen 1503 error level setting. 1504 """ 1505 token = token or self._curr or self._prev or Token.string("") 1506 start = token.start 1507 end = token.end + 1 1508 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1509 highlight = self.sql[start:end] 1510 end_context = self.sql[end : end + self.error_message_context] 1511 1512 error = ParseError.new( 1513 f"{message}. Line {token.line}, Col: {token.col}.\n" 1514 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1515 description=message, 1516 line=token.line, 1517 col=token.col, 1518 start_context=start_context, 1519 highlight=highlight, 1520 end_context=end_context, 1521 ) 1522 1523 if self.error_level == ErrorLevel.IMMEDIATE: 1524 raise error 1525 1526 self.errors.append(error) 1527 1528 def expression( 1529 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1530 ) -> E: 1531 """ 1532 Creates a new, validated Expression. 1533 1534 Args: 1535 exp_class: The expression class to instantiate. 1536 comments: An optional list of comments to attach to the expression. 1537 kwargs: The arguments to set for the expression along with their respective values. 1538 1539 Returns: 1540 The target expression. 1541 """ 1542 instance = exp_class(**kwargs) 1543 instance.add_comments(comments) if comments else self._add_comments(instance) 1544 return self.validate_expression(instance) 1545 1546 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1547 if expression and self._prev_comments: 1548 expression.add_comments(self._prev_comments) 1549 self._prev_comments = None 1550 1551 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1552 """ 1553 Validates an Expression, making sure that all its mandatory arguments are set. 1554 1555 Args: 1556 expression: The expression to validate. 1557 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1558 1559 Returns: 1560 The validated expression. 1561 """ 1562 if self.error_level != ErrorLevel.IGNORE: 1563 for error_message in expression.error_messages(args): 1564 self.raise_error(error_message) 1565 1566 return expression 1567 1568 def _find_sql(self, start: Token, end: Token) -> str: 1569 return self.sql[start.start : end.end + 1] 1570 1571 def _is_connected(self) -> bool: 1572 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1573 1574 def _advance(self, times: int = 1) -> None: 1575 self._index += times 1576 self._curr = seq_get(self._tokens, self._index) 1577 self._next = seq_get(self._tokens, self._index + 1) 1578 1579 if self._index > 0: 1580 self._prev = self._tokens[self._index - 1] 1581 self._prev_comments = self._prev.comments 1582 else: 1583 self._prev = None 1584 self._prev_comments = None 1585 1586 def _retreat(self, index: int) -> None: 1587 if index != self._index: 1588 self._advance(index - self._index) 1589 1590 def _warn_unsupported(self) -> None: 1591 if len(self._tokens) <= 1: 1592 return 1593 1594 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1595 # interested in emitting a warning for the one being currently processed. 1596 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1597 1598 logger.warning( 1599 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1600 ) 1601 1602 def _parse_command(self) -> exp.Command: 1603 self._warn_unsupported() 1604 return self.expression( 1605 exp.Command, 1606 comments=self._prev_comments, 1607 this=self._prev.text.upper(), 1608 expression=self._parse_string(), 1609 ) 1610 1611 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1612 """ 1613 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1614 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1615 solve this by setting & resetting the parser state accordingly 1616 """ 1617 index = self._index 1618 error_level = self.error_level 1619 1620 self.error_level = ErrorLevel.IMMEDIATE 1621 try: 1622 this = parse_method() 1623 except ParseError: 1624 this = None 1625 finally: 1626 if not this or retreat: 1627 self._retreat(index) 1628 self.error_level = error_level 1629 1630 return this 1631 1632 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1633 start = self._prev 1634 exists = self._parse_exists() if allow_exists else None 1635 1636 self._match(TokenType.ON) 1637 1638 materialized = self._match_text_seq("MATERIALIZED") 1639 kind = self._match_set(self.CREATABLES) and self._prev 1640 if not kind: 1641 return self._parse_as_command(start) 1642 1643 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1644 this = self._parse_user_defined_function(kind=kind.token_type) 1645 elif kind.token_type == TokenType.TABLE: 1646 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1647 elif kind.token_type == TokenType.COLUMN: 1648 this = self._parse_column() 1649 else: 1650 this = self._parse_id_var() 1651 1652 self._match(TokenType.IS) 1653 1654 return self.expression( 1655 exp.Comment, 1656 this=this, 1657 kind=kind.text, 1658 expression=self._parse_string(), 1659 exists=exists, 1660 materialized=materialized, 1661 ) 1662 1663 def _parse_to_table( 1664 self, 1665 ) -> exp.ToTableProperty: 1666 table = self._parse_table_parts(schema=True) 1667 return self.expression(exp.ToTableProperty, this=table) 1668 1669 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1670 def _parse_ttl(self) -> exp.Expression: 1671 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1672 this = self._parse_bitwise() 1673 1674 if self._match_text_seq("DELETE"): 1675 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1676 if self._match_text_seq("RECOMPRESS"): 1677 return self.expression( 1678 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1679 ) 1680 if self._match_text_seq("TO", "DISK"): 1681 return self.expression( 1682 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1683 ) 1684 if self._match_text_seq("TO", "VOLUME"): 1685 return self.expression( 1686 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1687 ) 1688 1689 return this 1690 1691 expressions = self._parse_csv(_parse_ttl_action) 1692 where = self._parse_where() 1693 group = self._parse_group() 1694 1695 aggregates = None 1696 if group and self._match(TokenType.SET): 1697 aggregates = self._parse_csv(self._parse_set_item) 1698 1699 return self.expression( 1700 exp.MergeTreeTTL, 1701 expressions=expressions, 1702 where=where, 1703 group=group, 1704 aggregates=aggregates, 1705 ) 1706 1707 def _parse_statement(self) -> t.Optional[exp.Expression]: 1708 if self._curr is None: 1709 return None 1710 1711 if self._match_set(self.STATEMENT_PARSERS): 1712 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1713 1714 if self._match_set(self.dialect.tokenizer.COMMANDS): 1715 return self._parse_command() 1716 1717 expression = self._parse_expression() 1718 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1719 return self._parse_query_modifiers(expression) 1720 1721 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1722 start = self._prev 1723 temporary = self._match(TokenType.TEMPORARY) 1724 materialized = self._match_text_seq("MATERIALIZED") 1725 1726 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1727 if not kind: 1728 return self._parse_as_command(start) 1729 1730 concurrently = self._match_text_seq("CONCURRENTLY") 1731 if_exists = exists or self._parse_exists() 1732 table = self._parse_table_parts( 1733 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1734 ) 1735 1736 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1737 1738 if self._match(TokenType.L_PAREN, advance=False): 1739 expressions = self._parse_wrapped_csv(self._parse_types) 1740 else: 1741 expressions = None 1742 1743 return self.expression( 1744 exp.Drop, 1745 comments=start.comments, 1746 exists=if_exists, 1747 this=table, 1748 expressions=expressions, 1749 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1750 temporary=temporary, 1751 materialized=materialized, 1752 cascade=self._match_text_seq("CASCADE"), 1753 constraints=self._match_text_seq("CONSTRAINTS"), 1754 purge=self._match_text_seq("PURGE"), 1755 cluster=cluster, 1756 concurrently=concurrently, 1757 ) 1758 1759 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1760 return ( 1761 self._match_text_seq("IF") 1762 and (not not_ or self._match(TokenType.NOT)) 1763 and self._match(TokenType.EXISTS) 1764 ) 1765 1766 def _parse_create(self) -> exp.Create | exp.Command: 1767 # Note: this can't be None because we've matched a statement parser 1768 start = self._prev 1769 comments = self._prev_comments 1770 1771 replace = ( 1772 start.token_type == TokenType.REPLACE 1773 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1774 or self._match_pair(TokenType.OR, TokenType.ALTER) 1775 ) 1776 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1777 1778 unique = self._match(TokenType.UNIQUE) 1779 1780 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1781 clustered = True 1782 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1783 "COLUMNSTORE" 1784 ): 1785 clustered = False 1786 else: 1787 clustered = None 1788 1789 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1790 self._advance() 1791 1792 properties = None 1793 create_token = self._match_set(self.CREATABLES) and self._prev 1794 1795 if not create_token: 1796 # exp.Properties.Location.POST_CREATE 1797 properties = self._parse_properties() 1798 create_token = self._match_set(self.CREATABLES) and self._prev 1799 1800 if not properties or not create_token: 1801 return self._parse_as_command(start) 1802 1803 concurrently = self._match_text_seq("CONCURRENTLY") 1804 exists = self._parse_exists(not_=True) 1805 this = None 1806 expression: t.Optional[exp.Expression] = None 1807 indexes = None 1808 no_schema_binding = None 1809 begin = None 1810 end = None 1811 clone = None 1812 1813 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1814 nonlocal properties 1815 if properties and temp_props: 1816 properties.expressions.extend(temp_props.expressions) 1817 elif temp_props: 1818 properties = temp_props 1819 1820 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1821 this = self._parse_user_defined_function(kind=create_token.token_type) 1822 1823 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1824 extend_props(self._parse_properties()) 1825 1826 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1827 extend_props(self._parse_properties()) 1828 1829 if not expression: 1830 if self._match(TokenType.COMMAND): 1831 expression = self._parse_as_command(self._prev) 1832 else: 1833 begin = self._match(TokenType.BEGIN) 1834 return_ = self._match_text_seq("RETURN") 1835 1836 if self._match(TokenType.STRING, advance=False): 1837 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1838 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1839 expression = self._parse_string() 1840 extend_props(self._parse_properties()) 1841 else: 1842 expression = self._parse_statement() 1843 1844 end = self._match_text_seq("END") 1845 1846 if return_: 1847 expression = self.expression(exp.Return, this=expression) 1848 elif create_token.token_type == TokenType.INDEX: 1849 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1850 if not self._match(TokenType.ON): 1851 index = self._parse_id_var() 1852 anonymous = False 1853 else: 1854 index = None 1855 anonymous = True 1856 1857 this = self._parse_index(index=index, anonymous=anonymous) 1858 elif create_token.token_type in self.DB_CREATABLES: 1859 table_parts = self._parse_table_parts( 1860 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1861 ) 1862 1863 # exp.Properties.Location.POST_NAME 1864 self._match(TokenType.COMMA) 1865 extend_props(self._parse_properties(before=True)) 1866 1867 this = self._parse_schema(this=table_parts) 1868 1869 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1870 extend_props(self._parse_properties()) 1871 1872 self._match(TokenType.ALIAS) 1873 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1874 # exp.Properties.Location.POST_ALIAS 1875 extend_props(self._parse_properties()) 1876 1877 if create_token.token_type == TokenType.SEQUENCE: 1878 expression = self._parse_types() 1879 extend_props(self._parse_properties()) 1880 else: 1881 expression = self._parse_ddl_select() 1882 1883 if create_token.token_type == TokenType.TABLE: 1884 # exp.Properties.Location.POST_EXPRESSION 1885 extend_props(self._parse_properties()) 1886 1887 indexes = [] 1888 while True: 1889 index = self._parse_index() 1890 1891 # exp.Properties.Location.POST_INDEX 1892 extend_props(self._parse_properties()) 1893 if not index: 1894 break 1895 else: 1896 self._match(TokenType.COMMA) 1897 indexes.append(index) 1898 elif create_token.token_type == TokenType.VIEW: 1899 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1900 no_schema_binding = True 1901 1902 shallow = self._match_text_seq("SHALLOW") 1903 1904 if self._match_texts(self.CLONE_KEYWORDS): 1905 copy = self._prev.text.lower() == "copy" 1906 clone = self.expression( 1907 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1908 ) 1909 1910 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1911 return self._parse_as_command(start) 1912 1913 create_kind_text = create_token.text.upper() 1914 return self.expression( 1915 exp.Create, 1916 comments=comments, 1917 this=this, 1918 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1919 replace=replace, 1920 refresh=refresh, 1921 unique=unique, 1922 expression=expression, 1923 exists=exists, 1924 properties=properties, 1925 indexes=indexes, 1926 no_schema_binding=no_schema_binding, 1927 begin=begin, 1928 end=end, 1929 clone=clone, 1930 concurrently=concurrently, 1931 clustered=clustered, 1932 ) 1933 1934 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1935 seq = exp.SequenceProperties() 1936 1937 options = [] 1938 index = self._index 1939 1940 while self._curr: 1941 self._match(TokenType.COMMA) 1942 if self._match_text_seq("INCREMENT"): 1943 self._match_text_seq("BY") 1944 self._match_text_seq("=") 1945 seq.set("increment", self._parse_term()) 1946 elif self._match_text_seq("MINVALUE"): 1947 seq.set("minvalue", self._parse_term()) 1948 elif self._match_text_seq("MAXVALUE"): 1949 seq.set("maxvalue", self._parse_term()) 1950 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1951 self._match_text_seq("=") 1952 seq.set("start", self._parse_term()) 1953 elif self._match_text_seq("CACHE"): 1954 # T-SQL allows empty CACHE which is initialized dynamically 1955 seq.set("cache", self._parse_number() or True) 1956 elif self._match_text_seq("OWNED", "BY"): 1957 # "OWNED BY NONE" is the default 1958 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1959 else: 1960 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1961 if opt: 1962 options.append(opt) 1963 else: 1964 break 1965 1966 seq.set("options", options if options else None) 1967 return None if self._index == index else seq 1968 1969 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1970 # only used for teradata currently 1971 self._match(TokenType.COMMA) 1972 1973 kwargs = { 1974 "no": self._match_text_seq("NO"), 1975 "dual": self._match_text_seq("DUAL"), 1976 "before": self._match_text_seq("BEFORE"), 1977 "default": self._match_text_seq("DEFAULT"), 1978 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1979 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1980 "after": self._match_text_seq("AFTER"), 1981 "minimum": self._match_texts(("MIN", "MINIMUM")), 1982 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1983 } 1984 1985 if self._match_texts(self.PROPERTY_PARSERS): 1986 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1987 try: 1988 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1989 except TypeError: 1990 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1991 1992 return None 1993 1994 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1995 return self._parse_wrapped_csv(self._parse_property) 1996 1997 def _parse_property(self) -> t.Optional[exp.Expression]: 1998 if self._match_texts(self.PROPERTY_PARSERS): 1999 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2000 2001 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2002 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2003 2004 if self._match_text_seq("COMPOUND", "SORTKEY"): 2005 return self._parse_sortkey(compound=True) 2006 2007 if self._match_text_seq("SQL", "SECURITY"): 2008 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2009 2010 index = self._index 2011 key = self._parse_column() 2012 2013 if not self._match(TokenType.EQ): 2014 self._retreat(index) 2015 return self._parse_sequence_properties() 2016 2017 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2018 if isinstance(key, exp.Column): 2019 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2020 2021 value = self._parse_bitwise() or self._parse_var(any_token=True) 2022 2023 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2024 if isinstance(value, exp.Column): 2025 value = exp.var(value.name) 2026 2027 return self.expression(exp.Property, this=key, value=value) 2028 2029 def _parse_stored(self) -> exp.FileFormatProperty: 2030 self._match(TokenType.ALIAS) 2031 2032 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2033 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2034 2035 return self.expression( 2036 exp.FileFormatProperty, 2037 this=( 2038 self.expression( 2039 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2040 ) 2041 if input_format or output_format 2042 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2043 ), 2044 ) 2045 2046 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2047 field = self._parse_field() 2048 if isinstance(field, exp.Identifier) and not field.quoted: 2049 field = exp.var(field) 2050 2051 return field 2052 2053 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2054 self._match(TokenType.EQ) 2055 self._match(TokenType.ALIAS) 2056 2057 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2058 2059 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2060 properties = [] 2061 while True: 2062 if before: 2063 prop = self._parse_property_before() 2064 else: 2065 prop = self._parse_property() 2066 if not prop: 2067 break 2068 for p in ensure_list(prop): 2069 properties.append(p) 2070 2071 if properties: 2072 return self.expression(exp.Properties, expressions=properties) 2073 2074 return None 2075 2076 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2077 return self.expression( 2078 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2079 ) 2080 2081 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2082 if self._match_texts(("DEFINER", "INVOKER")): 2083 security_specifier = self._prev.text.upper() 2084 return self.expression(exp.SecurityProperty, this=security_specifier) 2085 return None 2086 2087 def _parse_settings_property(self) -> exp.SettingsProperty: 2088 return self.expression( 2089 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2090 ) 2091 2092 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2093 if self._index >= 2: 2094 pre_volatile_token = self._tokens[self._index - 2] 2095 else: 2096 pre_volatile_token = None 2097 2098 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2099 return exp.VolatileProperty() 2100 2101 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2102 2103 def _parse_retention_period(self) -> exp.Var: 2104 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2105 number = self._parse_number() 2106 number_str = f"{number} " if number else "" 2107 unit = self._parse_var(any_token=True) 2108 return exp.var(f"{number_str}{unit}") 2109 2110 def _parse_system_versioning_property( 2111 self, with_: bool = False 2112 ) -> exp.WithSystemVersioningProperty: 2113 self._match(TokenType.EQ) 2114 prop = self.expression( 2115 exp.WithSystemVersioningProperty, 2116 **{ # type: ignore 2117 "on": True, 2118 "with": with_, 2119 }, 2120 ) 2121 2122 if self._match_text_seq("OFF"): 2123 prop.set("on", False) 2124 return prop 2125 2126 self._match(TokenType.ON) 2127 if self._match(TokenType.L_PAREN): 2128 while self._curr and not self._match(TokenType.R_PAREN): 2129 if self._match_text_seq("HISTORY_TABLE", "="): 2130 prop.set("this", self._parse_table_parts()) 2131 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2132 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2133 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2134 prop.set("retention_period", self._parse_retention_period()) 2135 2136 self._match(TokenType.COMMA) 2137 2138 return prop 2139 2140 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2141 self._match(TokenType.EQ) 2142 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2143 prop = self.expression(exp.DataDeletionProperty, on=on) 2144 2145 if self._match(TokenType.L_PAREN): 2146 while self._curr and not self._match(TokenType.R_PAREN): 2147 if self._match_text_seq("FILTER_COLUMN", "="): 2148 prop.set("filter_column", self._parse_column()) 2149 elif self._match_text_seq("RETENTION_PERIOD", "="): 2150 prop.set("retention_period", self._parse_retention_period()) 2151 2152 self._match(TokenType.COMMA) 2153 2154 return prop 2155 2156 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2157 kind = "HASH" 2158 expressions: t.Optional[t.List[exp.Expression]] = None 2159 if self._match_text_seq("BY", "HASH"): 2160 expressions = self._parse_wrapped_csv(self._parse_id_var) 2161 elif self._match_text_seq("BY", "RANDOM"): 2162 kind = "RANDOM" 2163 2164 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2165 buckets: t.Optional[exp.Expression] = None 2166 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2167 buckets = self._parse_number() 2168 2169 return self.expression( 2170 exp.DistributedByProperty, 2171 expressions=expressions, 2172 kind=kind, 2173 buckets=buckets, 2174 order=self._parse_order(), 2175 ) 2176 2177 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2178 self._match_text_seq("KEY") 2179 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2180 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2181 2182 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2183 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2184 prop = self._parse_system_versioning_property(with_=True) 2185 self._match_r_paren() 2186 return prop 2187 2188 if self._match(TokenType.L_PAREN, advance=False): 2189 return self._parse_wrapped_properties() 2190 2191 if self._match_text_seq("JOURNAL"): 2192 return self._parse_withjournaltable() 2193 2194 if self._match_texts(self.VIEW_ATTRIBUTES): 2195 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2196 2197 if self._match_text_seq("DATA"): 2198 return self._parse_withdata(no=False) 2199 elif self._match_text_seq("NO", "DATA"): 2200 return self._parse_withdata(no=True) 2201 2202 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2203 return self._parse_serde_properties(with_=True) 2204 2205 if self._match(TokenType.SCHEMA): 2206 return self.expression( 2207 exp.WithSchemaBindingProperty, 2208 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2209 ) 2210 2211 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2212 return self.expression( 2213 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2214 ) 2215 2216 if not self._next: 2217 return None 2218 2219 return self._parse_withisolatedloading() 2220 2221 def _parse_procedure_option(self) -> exp.Expression | None: 2222 if self._match_text_seq("EXECUTE", "AS"): 2223 return self.expression( 2224 exp.ExecuteAsProperty, 2225 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2226 or self._parse_string(), 2227 ) 2228 2229 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2230 2231 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2232 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2233 self._match(TokenType.EQ) 2234 2235 user = self._parse_id_var() 2236 self._match(TokenType.PARAMETER) 2237 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2238 2239 if not user or not host: 2240 return None 2241 2242 return exp.DefinerProperty(this=f"{user}@{host}") 2243 2244 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2245 self._match(TokenType.TABLE) 2246 self._match(TokenType.EQ) 2247 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2248 2249 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2250 return self.expression(exp.LogProperty, no=no) 2251 2252 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2253 return self.expression(exp.JournalProperty, **kwargs) 2254 2255 def _parse_checksum(self) -> exp.ChecksumProperty: 2256 self._match(TokenType.EQ) 2257 2258 on = None 2259 if self._match(TokenType.ON): 2260 on = True 2261 elif self._match_text_seq("OFF"): 2262 on = False 2263 2264 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2265 2266 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2267 return self.expression( 2268 exp.Cluster, 2269 expressions=( 2270 self._parse_wrapped_csv(self._parse_ordered) 2271 if wrapped 2272 else self._parse_csv(self._parse_ordered) 2273 ), 2274 ) 2275 2276 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2277 self._match_text_seq("BY") 2278 2279 self._match_l_paren() 2280 expressions = self._parse_csv(self._parse_column) 2281 self._match_r_paren() 2282 2283 if self._match_text_seq("SORTED", "BY"): 2284 self._match_l_paren() 2285 sorted_by = self._parse_csv(self._parse_ordered) 2286 self._match_r_paren() 2287 else: 2288 sorted_by = None 2289 2290 self._match(TokenType.INTO) 2291 buckets = self._parse_number() 2292 self._match_text_seq("BUCKETS") 2293 2294 return self.expression( 2295 exp.ClusteredByProperty, 2296 expressions=expressions, 2297 sorted_by=sorted_by, 2298 buckets=buckets, 2299 ) 2300 2301 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2302 if not self._match_text_seq("GRANTS"): 2303 self._retreat(self._index - 1) 2304 return None 2305 2306 return self.expression(exp.CopyGrantsProperty) 2307 2308 def _parse_freespace(self) -> exp.FreespaceProperty: 2309 self._match(TokenType.EQ) 2310 return self.expression( 2311 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2312 ) 2313 2314 def _parse_mergeblockratio( 2315 self, no: bool = False, default: bool = False 2316 ) -> exp.MergeBlockRatioProperty: 2317 if self._match(TokenType.EQ): 2318 return self.expression( 2319 exp.MergeBlockRatioProperty, 2320 this=self._parse_number(), 2321 percent=self._match(TokenType.PERCENT), 2322 ) 2323 2324 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2325 2326 def _parse_datablocksize( 2327 self, 2328 default: t.Optional[bool] = None, 2329 minimum: t.Optional[bool] = None, 2330 maximum: t.Optional[bool] = None, 2331 ) -> exp.DataBlocksizeProperty: 2332 self._match(TokenType.EQ) 2333 size = self._parse_number() 2334 2335 units = None 2336 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2337 units = self._prev.text 2338 2339 return self.expression( 2340 exp.DataBlocksizeProperty, 2341 size=size, 2342 units=units, 2343 default=default, 2344 minimum=minimum, 2345 maximum=maximum, 2346 ) 2347 2348 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2349 self._match(TokenType.EQ) 2350 always = self._match_text_seq("ALWAYS") 2351 manual = self._match_text_seq("MANUAL") 2352 never = self._match_text_seq("NEVER") 2353 default = self._match_text_seq("DEFAULT") 2354 2355 autotemp = None 2356 if self._match_text_seq("AUTOTEMP"): 2357 autotemp = self._parse_schema() 2358 2359 return self.expression( 2360 exp.BlockCompressionProperty, 2361 always=always, 2362 manual=manual, 2363 never=never, 2364 default=default, 2365 autotemp=autotemp, 2366 ) 2367 2368 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2369 index = self._index 2370 no = self._match_text_seq("NO") 2371 concurrent = self._match_text_seq("CONCURRENT") 2372 2373 if not self._match_text_seq("ISOLATED", "LOADING"): 2374 self._retreat(index) 2375 return None 2376 2377 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2378 return self.expression( 2379 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2380 ) 2381 2382 def _parse_locking(self) -> exp.LockingProperty: 2383 if self._match(TokenType.TABLE): 2384 kind = "TABLE" 2385 elif self._match(TokenType.VIEW): 2386 kind = "VIEW" 2387 elif self._match(TokenType.ROW): 2388 kind = "ROW" 2389 elif self._match_text_seq("DATABASE"): 2390 kind = "DATABASE" 2391 else: 2392 kind = None 2393 2394 if kind in ("DATABASE", "TABLE", "VIEW"): 2395 this = self._parse_table_parts() 2396 else: 2397 this = None 2398 2399 if self._match(TokenType.FOR): 2400 for_or_in = "FOR" 2401 elif self._match(TokenType.IN): 2402 for_or_in = "IN" 2403 else: 2404 for_or_in = None 2405 2406 if self._match_text_seq("ACCESS"): 2407 lock_type = "ACCESS" 2408 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2409 lock_type = "EXCLUSIVE" 2410 elif self._match_text_seq("SHARE"): 2411 lock_type = "SHARE" 2412 elif self._match_text_seq("READ"): 2413 lock_type = "READ" 2414 elif self._match_text_seq("WRITE"): 2415 lock_type = "WRITE" 2416 elif self._match_text_seq("CHECKSUM"): 2417 lock_type = "CHECKSUM" 2418 else: 2419 lock_type = None 2420 2421 override = self._match_text_seq("OVERRIDE") 2422 2423 return self.expression( 2424 exp.LockingProperty, 2425 this=this, 2426 kind=kind, 2427 for_or_in=for_or_in, 2428 lock_type=lock_type, 2429 override=override, 2430 ) 2431 2432 def _parse_partition_by(self) -> t.List[exp.Expression]: 2433 if self._match(TokenType.PARTITION_BY): 2434 return self._parse_csv(self._parse_assignment) 2435 return [] 2436 2437 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2438 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2439 if self._match_text_seq("MINVALUE"): 2440 return exp.var("MINVALUE") 2441 if self._match_text_seq("MAXVALUE"): 2442 return exp.var("MAXVALUE") 2443 return self._parse_bitwise() 2444 2445 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2446 expression = None 2447 from_expressions = None 2448 to_expressions = None 2449 2450 if self._match(TokenType.IN): 2451 this = self._parse_wrapped_csv(self._parse_bitwise) 2452 elif self._match(TokenType.FROM): 2453 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2454 self._match_text_seq("TO") 2455 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2456 elif self._match_text_seq("WITH", "(", "MODULUS"): 2457 this = self._parse_number() 2458 self._match_text_seq(",", "REMAINDER") 2459 expression = self._parse_number() 2460 self._match_r_paren() 2461 else: 2462 self.raise_error("Failed to parse partition bound spec.") 2463 2464 return self.expression( 2465 exp.PartitionBoundSpec, 2466 this=this, 2467 expression=expression, 2468 from_expressions=from_expressions, 2469 to_expressions=to_expressions, 2470 ) 2471 2472 # https://www.postgresql.org/docs/current/sql-createtable.html 2473 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2474 if not self._match_text_seq("OF"): 2475 self._retreat(self._index - 1) 2476 return None 2477 2478 this = self._parse_table(schema=True) 2479 2480 if self._match(TokenType.DEFAULT): 2481 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2482 elif self._match_text_seq("FOR", "VALUES"): 2483 expression = self._parse_partition_bound_spec() 2484 else: 2485 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2486 2487 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2488 2489 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2490 self._match(TokenType.EQ) 2491 return self.expression( 2492 exp.PartitionedByProperty, 2493 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2494 ) 2495 2496 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2497 if self._match_text_seq("AND", "STATISTICS"): 2498 statistics = True 2499 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2500 statistics = False 2501 else: 2502 statistics = None 2503 2504 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2505 2506 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2507 if self._match_text_seq("SQL"): 2508 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2509 return None 2510 2511 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2512 if self._match_text_seq("SQL", "DATA"): 2513 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2514 return None 2515 2516 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2517 if self._match_text_seq("PRIMARY", "INDEX"): 2518 return exp.NoPrimaryIndexProperty() 2519 if self._match_text_seq("SQL"): 2520 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2521 return None 2522 2523 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2524 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2525 return exp.OnCommitProperty() 2526 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2527 return exp.OnCommitProperty(delete=True) 2528 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2529 2530 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2531 if self._match_text_seq("SQL", "DATA"): 2532 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2533 return None 2534 2535 def _parse_distkey(self) -> exp.DistKeyProperty: 2536 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2537 2538 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2539 table = self._parse_table(schema=True) 2540 2541 options = [] 2542 while self._match_texts(("INCLUDING", "EXCLUDING")): 2543 this = self._prev.text.upper() 2544 2545 id_var = self._parse_id_var() 2546 if not id_var: 2547 return None 2548 2549 options.append( 2550 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2551 ) 2552 2553 return self.expression(exp.LikeProperty, this=table, expressions=options) 2554 2555 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2556 return self.expression( 2557 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2558 ) 2559 2560 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2564 ) 2565 2566 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2567 self._match_text_seq("WITH", "CONNECTION") 2568 return self.expression( 2569 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2570 ) 2571 2572 def _parse_returns(self) -> exp.ReturnsProperty: 2573 value: t.Optional[exp.Expression] 2574 null = None 2575 is_table = self._match(TokenType.TABLE) 2576 2577 if is_table: 2578 if self._match(TokenType.LT): 2579 value = self.expression( 2580 exp.Schema, 2581 this="TABLE", 2582 expressions=self._parse_csv(self._parse_struct_types), 2583 ) 2584 if not self._match(TokenType.GT): 2585 self.raise_error("Expecting >") 2586 else: 2587 value = self._parse_schema(exp.var("TABLE")) 2588 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2589 null = True 2590 value = None 2591 else: 2592 value = self._parse_types() 2593 2594 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2595 2596 def _parse_describe(self) -> exp.Describe: 2597 kind = self._match_set(self.CREATABLES) and self._prev.text 2598 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2599 if self._match(TokenType.DOT): 2600 style = None 2601 self._retreat(self._index - 2) 2602 this = self._parse_table(schema=True) 2603 properties = self._parse_properties() 2604 expressions = properties.expressions if properties else None 2605 partition = self._parse_partition() 2606 return self.expression( 2607 exp.Describe, 2608 this=this, 2609 style=style, 2610 kind=kind, 2611 expressions=expressions, 2612 partition=partition, 2613 ) 2614 2615 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2616 kind = self._prev.text.upper() 2617 expressions = [] 2618 2619 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2620 if self._match(TokenType.WHEN): 2621 expression = self._parse_disjunction() 2622 self._match(TokenType.THEN) 2623 else: 2624 expression = None 2625 2626 else_ = self._match(TokenType.ELSE) 2627 2628 if not self._match(TokenType.INTO): 2629 return None 2630 2631 return self.expression( 2632 exp.ConditionalInsert, 2633 this=self.expression( 2634 exp.Insert, 2635 this=self._parse_table(schema=True), 2636 expression=self._parse_derived_table_values(), 2637 ), 2638 expression=expression, 2639 else_=else_, 2640 ) 2641 2642 expression = parse_conditional_insert() 2643 while expression is not None: 2644 expressions.append(expression) 2645 expression = parse_conditional_insert() 2646 2647 return self.expression( 2648 exp.MultitableInserts, 2649 kind=kind, 2650 comments=comments, 2651 expressions=expressions, 2652 source=self._parse_table(), 2653 ) 2654 2655 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2656 comments = ensure_list(self._prev_comments) 2657 hint = self._parse_hint() 2658 overwrite = self._match(TokenType.OVERWRITE) 2659 ignore = self._match(TokenType.IGNORE) 2660 local = self._match_text_seq("LOCAL") 2661 alternative = None 2662 is_function = None 2663 2664 if self._match_text_seq("DIRECTORY"): 2665 this: t.Optional[exp.Expression] = self.expression( 2666 exp.Directory, 2667 this=self._parse_var_or_string(), 2668 local=local, 2669 row_format=self._parse_row_format(match_row=True), 2670 ) 2671 else: 2672 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2673 comments += ensure_list(self._prev_comments) 2674 return self._parse_multitable_inserts(comments) 2675 2676 if self._match(TokenType.OR): 2677 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2678 2679 self._match(TokenType.INTO) 2680 comments += ensure_list(self._prev_comments) 2681 self._match(TokenType.TABLE) 2682 is_function = self._match(TokenType.FUNCTION) 2683 2684 this = ( 2685 self._parse_table(schema=True, parse_partition=True) 2686 if not is_function 2687 else self._parse_function() 2688 ) 2689 2690 returning = self._parse_returning() 2691 2692 return self.expression( 2693 exp.Insert, 2694 comments=comments, 2695 hint=hint, 2696 is_function=is_function, 2697 this=this, 2698 stored=self._match_text_seq("STORED") and self._parse_stored(), 2699 by_name=self._match_text_seq("BY", "NAME"), 2700 exists=self._parse_exists(), 2701 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2702 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2703 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2704 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2705 conflict=self._parse_on_conflict(), 2706 returning=returning or self._parse_returning(), 2707 overwrite=overwrite, 2708 alternative=alternative, 2709 ignore=ignore, 2710 source=self._match(TokenType.TABLE) and self._parse_table(), 2711 ) 2712 2713 def _parse_kill(self) -> exp.Kill: 2714 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2715 2716 return self.expression( 2717 exp.Kill, 2718 this=self._parse_primary(), 2719 kind=kind, 2720 ) 2721 2722 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2723 conflict = self._match_text_seq("ON", "CONFLICT") 2724 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2725 2726 if not conflict and not duplicate: 2727 return None 2728 2729 conflict_keys = None 2730 constraint = None 2731 2732 if conflict: 2733 if self._match_text_seq("ON", "CONSTRAINT"): 2734 constraint = self._parse_id_var() 2735 elif self._match(TokenType.L_PAREN): 2736 conflict_keys = self._parse_csv(self._parse_id_var) 2737 self._match_r_paren() 2738 2739 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2740 if self._prev.token_type == TokenType.UPDATE: 2741 self._match(TokenType.SET) 2742 expressions = self._parse_csv(self._parse_equality) 2743 else: 2744 expressions = None 2745 2746 return self.expression( 2747 exp.OnConflict, 2748 duplicate=duplicate, 2749 expressions=expressions, 2750 action=action, 2751 conflict_keys=conflict_keys, 2752 constraint=constraint, 2753 ) 2754 2755 def _parse_returning(self) -> t.Optional[exp.Returning]: 2756 if not self._match(TokenType.RETURNING): 2757 return None 2758 return self.expression( 2759 exp.Returning, 2760 expressions=self._parse_csv(self._parse_expression), 2761 into=self._match(TokenType.INTO) and self._parse_table_part(), 2762 ) 2763 2764 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2765 if not self._match(TokenType.FORMAT): 2766 return None 2767 return self._parse_row_format() 2768 2769 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2770 index = self._index 2771 with_ = with_ or self._match_text_seq("WITH") 2772 2773 if not self._match(TokenType.SERDE_PROPERTIES): 2774 self._retreat(index) 2775 return None 2776 return self.expression( 2777 exp.SerdeProperties, 2778 **{ # type: ignore 2779 "expressions": self._parse_wrapped_properties(), 2780 "with": with_, 2781 }, 2782 ) 2783 2784 def _parse_row_format( 2785 self, match_row: bool = False 2786 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2787 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2788 return None 2789 2790 if self._match_text_seq("SERDE"): 2791 this = self._parse_string() 2792 2793 serde_properties = self._parse_serde_properties() 2794 2795 return self.expression( 2796 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2797 ) 2798 2799 self._match_text_seq("DELIMITED") 2800 2801 kwargs = {} 2802 2803 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2804 kwargs["fields"] = self._parse_string() 2805 if self._match_text_seq("ESCAPED", "BY"): 2806 kwargs["escaped"] = self._parse_string() 2807 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2808 kwargs["collection_items"] = self._parse_string() 2809 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2810 kwargs["map_keys"] = self._parse_string() 2811 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2812 kwargs["lines"] = self._parse_string() 2813 if self._match_text_seq("NULL", "DEFINED", "AS"): 2814 kwargs["null"] = self._parse_string() 2815 2816 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2817 2818 def _parse_load(self) -> exp.LoadData | exp.Command: 2819 if self._match_text_seq("DATA"): 2820 local = self._match_text_seq("LOCAL") 2821 self._match_text_seq("INPATH") 2822 inpath = self._parse_string() 2823 overwrite = self._match(TokenType.OVERWRITE) 2824 self._match_pair(TokenType.INTO, TokenType.TABLE) 2825 2826 return self.expression( 2827 exp.LoadData, 2828 this=self._parse_table(schema=True), 2829 local=local, 2830 overwrite=overwrite, 2831 inpath=inpath, 2832 partition=self._parse_partition(), 2833 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2834 serde=self._match_text_seq("SERDE") and self._parse_string(), 2835 ) 2836 return self._parse_as_command(self._prev) 2837 2838 def _parse_delete(self) -> exp.Delete: 2839 # This handles MySQL's "Multiple-Table Syntax" 2840 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2841 tables = None 2842 comments = self._prev_comments 2843 if not self._match(TokenType.FROM, advance=False): 2844 tables = self._parse_csv(self._parse_table) or None 2845 2846 returning = self._parse_returning() 2847 2848 return self.expression( 2849 exp.Delete, 2850 comments=comments, 2851 tables=tables, 2852 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2853 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2854 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2855 where=self._parse_where(), 2856 returning=returning or self._parse_returning(), 2857 limit=self._parse_limit(), 2858 ) 2859 2860 def _parse_update(self) -> exp.Update: 2861 comments = self._prev_comments 2862 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2863 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2864 returning = self._parse_returning() 2865 return self.expression( 2866 exp.Update, 2867 comments=comments, 2868 **{ # type: ignore 2869 "this": this, 2870 "expressions": expressions, 2871 "from": self._parse_from(joins=True), 2872 "where": self._parse_where(), 2873 "returning": returning or self._parse_returning(), 2874 "order": self._parse_order(), 2875 "limit": self._parse_limit(), 2876 }, 2877 ) 2878 2879 def _parse_uncache(self) -> exp.Uncache: 2880 if not self._match(TokenType.TABLE): 2881 self.raise_error("Expecting TABLE after UNCACHE") 2882 2883 return self.expression( 2884 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2885 ) 2886 2887 def _parse_cache(self) -> exp.Cache: 2888 lazy = self._match_text_seq("LAZY") 2889 self._match(TokenType.TABLE) 2890 table = self._parse_table(schema=True) 2891 2892 options = [] 2893 if self._match_text_seq("OPTIONS"): 2894 self._match_l_paren() 2895 k = self._parse_string() 2896 self._match(TokenType.EQ) 2897 v = self._parse_string() 2898 options = [k, v] 2899 self._match_r_paren() 2900 2901 self._match(TokenType.ALIAS) 2902 return self.expression( 2903 exp.Cache, 2904 this=table, 2905 lazy=lazy, 2906 options=options, 2907 expression=self._parse_select(nested=True), 2908 ) 2909 2910 def _parse_partition(self) -> t.Optional[exp.Partition]: 2911 if not self._match(TokenType.PARTITION): 2912 return None 2913 2914 return self.expression( 2915 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2916 ) 2917 2918 def _parse_value(self) -> t.Optional[exp.Tuple]: 2919 if self._match(TokenType.L_PAREN): 2920 expressions = self._parse_csv(self._parse_expression) 2921 self._match_r_paren() 2922 return self.expression(exp.Tuple, expressions=expressions) 2923 2924 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2925 expression = self._parse_expression() 2926 if expression: 2927 return self.expression(exp.Tuple, expressions=[expression]) 2928 return None 2929 2930 def _parse_projections(self) -> t.List[exp.Expression]: 2931 return self._parse_expressions() 2932 2933 def _parse_select( 2934 self, 2935 nested: bool = False, 2936 table: bool = False, 2937 parse_subquery_alias: bool = True, 2938 parse_set_operation: bool = True, 2939 ) -> t.Optional[exp.Expression]: 2940 cte = self._parse_with() 2941 2942 if cte: 2943 this = self._parse_statement() 2944 2945 if not this: 2946 self.raise_error("Failed to parse any statement following CTE") 2947 return cte 2948 2949 if "with" in this.arg_types: 2950 this.set("with", cte) 2951 else: 2952 self.raise_error(f"{this.key} does not support CTE") 2953 this = cte 2954 2955 return this 2956 2957 # duckdb supports leading with FROM x 2958 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2959 2960 if self._match(TokenType.SELECT): 2961 comments = self._prev_comments 2962 2963 hint = self._parse_hint() 2964 2965 if self._next and not self._next.token_type == TokenType.DOT: 2966 all_ = self._match(TokenType.ALL) 2967 distinct = self._match_set(self.DISTINCT_TOKENS) 2968 else: 2969 all_, distinct = None, None 2970 2971 kind = ( 2972 self._match(TokenType.ALIAS) 2973 and self._match_texts(("STRUCT", "VALUE")) 2974 and self._prev.text.upper() 2975 ) 2976 2977 if distinct: 2978 distinct = self.expression( 2979 exp.Distinct, 2980 on=self._parse_value() if self._match(TokenType.ON) else None, 2981 ) 2982 2983 if all_ and distinct: 2984 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2985 2986 operation_modifiers = [] 2987 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 2988 operation_modifiers.append(exp.var(self._prev.text.upper())) 2989 2990 limit = self._parse_limit(top=True) 2991 projections = self._parse_projections() 2992 2993 this = self.expression( 2994 exp.Select, 2995 kind=kind, 2996 hint=hint, 2997 distinct=distinct, 2998 expressions=projections, 2999 limit=limit, 3000 operation_modifiers=operation_modifiers or None, 3001 ) 3002 this.comments = comments 3003 3004 into = self._parse_into() 3005 if into: 3006 this.set("into", into) 3007 3008 if not from_: 3009 from_ = self._parse_from() 3010 3011 if from_: 3012 this.set("from", from_) 3013 3014 this = self._parse_query_modifiers(this) 3015 elif (table or nested) and self._match(TokenType.L_PAREN): 3016 if self._match(TokenType.PIVOT): 3017 this = self._parse_simplified_pivot() 3018 elif self._match(TokenType.FROM): 3019 this = exp.select("*").from_( 3020 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3021 ) 3022 else: 3023 this = ( 3024 self._parse_table() 3025 if table 3026 else self._parse_select(nested=True, parse_set_operation=False) 3027 ) 3028 3029 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3030 # in case a modifier (e.g. join) is following 3031 if table and isinstance(this, exp.Values) and this.alias: 3032 alias = this.args["alias"].pop() 3033 this = exp.Table(this=this, alias=alias) 3034 3035 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3036 3037 self._match_r_paren() 3038 3039 # We return early here so that the UNION isn't attached to the subquery by the 3040 # following call to _parse_set_operations, but instead becomes the parent node 3041 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3042 elif self._match(TokenType.VALUES, advance=False): 3043 this = self._parse_derived_table_values() 3044 elif from_: 3045 this = exp.select("*").from_(from_.this, copy=False) 3046 elif self._match(TokenType.SUMMARIZE): 3047 table = self._match(TokenType.TABLE) 3048 this = self._parse_select() or self._parse_string() or self._parse_table() 3049 return self.expression(exp.Summarize, this=this, table=table) 3050 elif self._match(TokenType.DESCRIBE): 3051 this = self._parse_describe() 3052 elif self._match_text_seq("STREAM"): 3053 this = self.expression(exp.Stream, this=self._parse_function()) 3054 else: 3055 this = None 3056 3057 return self._parse_set_operations(this) if parse_set_operation else this 3058 3059 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3060 if not skip_with_token and not self._match(TokenType.WITH): 3061 return None 3062 3063 comments = self._prev_comments 3064 recursive = self._match(TokenType.RECURSIVE) 3065 3066 last_comments = None 3067 expressions = [] 3068 while True: 3069 expressions.append(self._parse_cte()) 3070 if last_comments: 3071 expressions[-1].add_comments(last_comments) 3072 3073 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3074 break 3075 else: 3076 self._match(TokenType.WITH) 3077 3078 last_comments = self._prev_comments 3079 3080 return self.expression( 3081 exp.With, comments=comments, expressions=expressions, recursive=recursive 3082 ) 3083 3084 def _parse_cte(self) -> exp.CTE: 3085 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3086 if not alias or not alias.this: 3087 self.raise_error("Expected CTE to have alias") 3088 3089 self._match(TokenType.ALIAS) 3090 comments = self._prev_comments 3091 3092 if self._match_text_seq("NOT", "MATERIALIZED"): 3093 materialized = False 3094 elif self._match_text_seq("MATERIALIZED"): 3095 materialized = True 3096 else: 3097 materialized = None 3098 3099 return self.expression( 3100 exp.CTE, 3101 this=self._parse_wrapped(self._parse_statement), 3102 alias=alias, 3103 materialized=materialized, 3104 comments=comments, 3105 ) 3106 3107 def _parse_table_alias( 3108 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3109 ) -> t.Optional[exp.TableAlias]: 3110 any_token = self._match(TokenType.ALIAS) 3111 alias = ( 3112 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3113 or self._parse_string_as_identifier() 3114 ) 3115 3116 index = self._index 3117 if self._match(TokenType.L_PAREN): 3118 columns = self._parse_csv(self._parse_function_parameter) 3119 self._match_r_paren() if columns else self._retreat(index) 3120 else: 3121 columns = None 3122 3123 if not alias and not columns: 3124 return None 3125 3126 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3127 3128 # We bubble up comments from the Identifier to the TableAlias 3129 if isinstance(alias, exp.Identifier): 3130 table_alias.add_comments(alias.pop_comments()) 3131 3132 return table_alias 3133 3134 def _parse_subquery( 3135 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3136 ) -> t.Optional[exp.Subquery]: 3137 if not this: 3138 return None 3139 3140 return self.expression( 3141 exp.Subquery, 3142 this=this, 3143 pivots=self._parse_pivots(), 3144 alias=self._parse_table_alias() if parse_alias else None, 3145 sample=self._parse_table_sample(), 3146 ) 3147 3148 def _implicit_unnests_to_explicit(self, this: E) -> E: 3149 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3150 3151 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3152 for i, join in enumerate(this.args.get("joins") or []): 3153 table = join.this 3154 normalized_table = table.copy() 3155 normalized_table.meta["maybe_column"] = True 3156 normalized_table = _norm(normalized_table, dialect=self.dialect) 3157 3158 if isinstance(table, exp.Table) and not join.args.get("on"): 3159 if normalized_table.parts[0].name in refs: 3160 table_as_column = table.to_column() 3161 unnest = exp.Unnest(expressions=[table_as_column]) 3162 3163 # Table.to_column creates a parent Alias node that we want to convert to 3164 # a TableAlias and attach to the Unnest, so it matches the parser's output 3165 if isinstance(table.args.get("alias"), exp.TableAlias): 3166 table_as_column.replace(table_as_column.this) 3167 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3168 3169 table.replace(unnest) 3170 3171 refs.add(normalized_table.alias_or_name) 3172 3173 return this 3174 3175 def _parse_query_modifiers( 3176 self, this: t.Optional[exp.Expression] 3177 ) -> t.Optional[exp.Expression]: 3178 if isinstance(this, (exp.Query, exp.Table)): 3179 for join in self._parse_joins(): 3180 this.append("joins", join) 3181 for lateral in iter(self._parse_lateral, None): 3182 this.append("laterals", lateral) 3183 3184 while True: 3185 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3186 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3187 key, expression = parser(self) 3188 3189 if expression: 3190 this.set(key, expression) 3191 if key == "limit": 3192 offset = expression.args.pop("offset", None) 3193 3194 if offset: 3195 offset = exp.Offset(expression=offset) 3196 this.set("offset", offset) 3197 3198 limit_by_expressions = expression.expressions 3199 expression.set("expressions", None) 3200 offset.set("expressions", limit_by_expressions) 3201 continue 3202 break 3203 3204 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3205 this = self._implicit_unnests_to_explicit(this) 3206 3207 return this 3208 3209 def _parse_hint(self) -> t.Optional[exp.Hint]: 3210 if self._match(TokenType.HINT): 3211 hints = [] 3212 for hint in iter( 3213 lambda: self._parse_csv( 3214 lambda: self._parse_function() or self._parse_var(upper=True) 3215 ), 3216 [], 3217 ): 3218 hints.extend(hint) 3219 3220 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3221 self.raise_error("Expected */ after HINT") 3222 3223 return self.expression(exp.Hint, expressions=hints) 3224 3225 return None 3226 3227 def _parse_into(self) -> t.Optional[exp.Into]: 3228 if not self._match(TokenType.INTO): 3229 return None 3230 3231 temp = self._match(TokenType.TEMPORARY) 3232 unlogged = self._match_text_seq("UNLOGGED") 3233 self._match(TokenType.TABLE) 3234 3235 return self.expression( 3236 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3237 ) 3238 3239 def _parse_from( 3240 self, joins: bool = False, skip_from_token: bool = False 3241 ) -> t.Optional[exp.From]: 3242 if not skip_from_token and not self._match(TokenType.FROM): 3243 return None 3244 3245 return self.expression( 3246 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3247 ) 3248 3249 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3250 return self.expression( 3251 exp.MatchRecognizeMeasure, 3252 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3253 this=self._parse_expression(), 3254 ) 3255 3256 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3257 if not self._match(TokenType.MATCH_RECOGNIZE): 3258 return None 3259 3260 self._match_l_paren() 3261 3262 partition = self._parse_partition_by() 3263 order = self._parse_order() 3264 3265 measures = ( 3266 self._parse_csv(self._parse_match_recognize_measure) 3267 if self._match_text_seq("MEASURES") 3268 else None 3269 ) 3270 3271 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3272 rows = exp.var("ONE ROW PER MATCH") 3273 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3274 text = "ALL ROWS PER MATCH" 3275 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3276 text += " SHOW EMPTY MATCHES" 3277 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3278 text += " OMIT EMPTY MATCHES" 3279 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3280 text += " WITH UNMATCHED ROWS" 3281 rows = exp.var(text) 3282 else: 3283 rows = None 3284 3285 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3286 text = "AFTER MATCH SKIP" 3287 if self._match_text_seq("PAST", "LAST", "ROW"): 3288 text += " PAST LAST ROW" 3289 elif self._match_text_seq("TO", "NEXT", "ROW"): 3290 text += " TO NEXT ROW" 3291 elif self._match_text_seq("TO", "FIRST"): 3292 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3293 elif self._match_text_seq("TO", "LAST"): 3294 text += f" TO LAST {self._advance_any().text}" # type: ignore 3295 after = exp.var(text) 3296 else: 3297 after = None 3298 3299 if self._match_text_seq("PATTERN"): 3300 self._match_l_paren() 3301 3302 if not self._curr: 3303 self.raise_error("Expecting )", self._curr) 3304 3305 paren = 1 3306 start = self._curr 3307 3308 while self._curr and paren > 0: 3309 if self._curr.token_type == TokenType.L_PAREN: 3310 paren += 1 3311 if self._curr.token_type == TokenType.R_PAREN: 3312 paren -= 1 3313 3314 end = self._prev 3315 self._advance() 3316 3317 if paren > 0: 3318 self.raise_error("Expecting )", self._curr) 3319 3320 pattern = exp.var(self._find_sql(start, end)) 3321 else: 3322 pattern = None 3323 3324 define = ( 3325 self._parse_csv(self._parse_name_as_expression) 3326 if self._match_text_seq("DEFINE") 3327 else None 3328 ) 3329 3330 self._match_r_paren() 3331 3332 return self.expression( 3333 exp.MatchRecognize, 3334 partition_by=partition, 3335 order=order, 3336 measures=measures, 3337 rows=rows, 3338 after=after, 3339 pattern=pattern, 3340 define=define, 3341 alias=self._parse_table_alias(), 3342 ) 3343 3344 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3345 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3346 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3347 cross_apply = False 3348 3349 if cross_apply is not None: 3350 this = self._parse_select(table=True) 3351 view = None 3352 outer = None 3353 elif self._match(TokenType.LATERAL): 3354 this = self._parse_select(table=True) 3355 view = self._match(TokenType.VIEW) 3356 outer = self._match(TokenType.OUTER) 3357 else: 3358 return None 3359 3360 if not this: 3361 this = ( 3362 self._parse_unnest() 3363 or self._parse_function() 3364 or self._parse_id_var(any_token=False) 3365 ) 3366 3367 while self._match(TokenType.DOT): 3368 this = exp.Dot( 3369 this=this, 3370 expression=self._parse_function() or self._parse_id_var(any_token=False), 3371 ) 3372 3373 if view: 3374 table = self._parse_id_var(any_token=False) 3375 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3376 table_alias: t.Optional[exp.TableAlias] = self.expression( 3377 exp.TableAlias, this=table, columns=columns 3378 ) 3379 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3380 # We move the alias from the lateral's child node to the lateral itself 3381 table_alias = this.args["alias"].pop() 3382 else: 3383 table_alias = self._parse_table_alias() 3384 3385 return self.expression( 3386 exp.Lateral, 3387 this=this, 3388 view=view, 3389 outer=outer, 3390 alias=table_alias, 3391 cross_apply=cross_apply, 3392 ) 3393 3394 def _parse_join_parts( 3395 self, 3396 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3397 return ( 3398 self._match_set(self.JOIN_METHODS) and self._prev, 3399 self._match_set(self.JOIN_SIDES) and self._prev, 3400 self._match_set(self.JOIN_KINDS) and self._prev, 3401 ) 3402 3403 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3404 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3405 this = self._parse_column() 3406 if isinstance(this, exp.Column): 3407 return this.this 3408 return this 3409 3410 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3411 3412 def _parse_join( 3413 self, skip_join_token: bool = False, parse_bracket: bool = False 3414 ) -> t.Optional[exp.Join]: 3415 if self._match(TokenType.COMMA): 3416 return self.expression(exp.Join, this=self._parse_table()) 3417 3418 index = self._index 3419 method, side, kind = self._parse_join_parts() 3420 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3421 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3422 3423 if not skip_join_token and not join: 3424 self._retreat(index) 3425 kind = None 3426 method = None 3427 side = None 3428 3429 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3430 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3431 3432 if not skip_join_token and not join and not outer_apply and not cross_apply: 3433 return None 3434 3435 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3436 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3437 kwargs["expressions"] = self._parse_csv( 3438 lambda: self._parse_table(parse_bracket=parse_bracket) 3439 ) 3440 3441 if method: 3442 kwargs["method"] = method.text 3443 if side: 3444 kwargs["side"] = side.text 3445 if kind: 3446 kwargs["kind"] = kind.text 3447 if hint: 3448 kwargs["hint"] = hint 3449 3450 if self._match(TokenType.MATCH_CONDITION): 3451 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3452 3453 if self._match(TokenType.ON): 3454 kwargs["on"] = self._parse_assignment() 3455 elif self._match(TokenType.USING): 3456 kwargs["using"] = self._parse_using_identifiers() 3457 elif ( 3458 not (outer_apply or cross_apply) 3459 and not isinstance(kwargs["this"], exp.Unnest) 3460 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3461 ): 3462 index = self._index 3463 joins: t.Optional[list] = list(self._parse_joins()) 3464 3465 if joins and self._match(TokenType.ON): 3466 kwargs["on"] = self._parse_assignment() 3467 elif joins and self._match(TokenType.USING): 3468 kwargs["using"] = self._parse_using_identifiers() 3469 else: 3470 joins = None 3471 self._retreat(index) 3472 3473 kwargs["this"].set("joins", joins if joins else None) 3474 3475 comments = [c for token in (method, side, kind) if token for c in token.comments] 3476 return self.expression(exp.Join, comments=comments, **kwargs) 3477 3478 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3479 this = self._parse_assignment() 3480 3481 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3482 return this 3483 3484 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3485 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3486 3487 return this 3488 3489 def _parse_index_params(self) -> exp.IndexParameters: 3490 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3491 3492 if self._match(TokenType.L_PAREN, advance=False): 3493 columns = self._parse_wrapped_csv(self._parse_with_operator) 3494 else: 3495 columns = None 3496 3497 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3498 partition_by = self._parse_partition_by() 3499 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3500 tablespace = ( 3501 self._parse_var(any_token=True) 3502 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3503 else None 3504 ) 3505 where = self._parse_where() 3506 3507 on = self._parse_field() if self._match(TokenType.ON) else None 3508 3509 return self.expression( 3510 exp.IndexParameters, 3511 using=using, 3512 columns=columns, 3513 include=include, 3514 partition_by=partition_by, 3515 where=where, 3516 with_storage=with_storage, 3517 tablespace=tablespace, 3518 on=on, 3519 ) 3520 3521 def _parse_index( 3522 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3523 ) -> t.Optional[exp.Index]: 3524 if index or anonymous: 3525 unique = None 3526 primary = None 3527 amp = None 3528 3529 self._match(TokenType.ON) 3530 self._match(TokenType.TABLE) # hive 3531 table = self._parse_table_parts(schema=True) 3532 else: 3533 unique = self._match(TokenType.UNIQUE) 3534 primary = self._match_text_seq("PRIMARY") 3535 amp = self._match_text_seq("AMP") 3536 3537 if not self._match(TokenType.INDEX): 3538 return None 3539 3540 index = self._parse_id_var() 3541 table = None 3542 3543 params = self._parse_index_params() 3544 3545 return self.expression( 3546 exp.Index, 3547 this=index, 3548 table=table, 3549 unique=unique, 3550 primary=primary, 3551 amp=amp, 3552 params=params, 3553 ) 3554 3555 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3556 hints: t.List[exp.Expression] = [] 3557 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3558 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3559 hints.append( 3560 self.expression( 3561 exp.WithTableHint, 3562 expressions=self._parse_csv( 3563 lambda: self._parse_function() or self._parse_var(any_token=True) 3564 ), 3565 ) 3566 ) 3567 self._match_r_paren() 3568 else: 3569 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3570 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3571 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3572 3573 self._match_set((TokenType.INDEX, TokenType.KEY)) 3574 if self._match(TokenType.FOR): 3575 hint.set("target", self._advance_any() and self._prev.text.upper()) 3576 3577 hint.set("expressions", self._parse_wrapped_id_vars()) 3578 hints.append(hint) 3579 3580 return hints or None 3581 3582 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3583 return ( 3584 (not schema and self._parse_function(optional_parens=False)) 3585 or self._parse_id_var(any_token=False) 3586 or self._parse_string_as_identifier() 3587 or self._parse_placeholder() 3588 ) 3589 3590 def _parse_table_parts( 3591 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3592 ) -> exp.Table: 3593 catalog = None 3594 db = None 3595 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3596 3597 while self._match(TokenType.DOT): 3598 if catalog: 3599 # This allows nesting the table in arbitrarily many dot expressions if needed 3600 table = self.expression( 3601 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3602 ) 3603 else: 3604 catalog = db 3605 db = table 3606 # "" used for tsql FROM a..b case 3607 table = self._parse_table_part(schema=schema) or "" 3608 3609 if ( 3610 wildcard 3611 and self._is_connected() 3612 and (isinstance(table, exp.Identifier) or not table) 3613 and self._match(TokenType.STAR) 3614 ): 3615 if isinstance(table, exp.Identifier): 3616 table.args["this"] += "*" 3617 else: 3618 table = exp.Identifier(this="*") 3619 3620 # We bubble up comments from the Identifier to the Table 3621 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3622 3623 if is_db_reference: 3624 catalog = db 3625 db = table 3626 table = None 3627 3628 if not table and not is_db_reference: 3629 self.raise_error(f"Expected table name but got {self._curr}") 3630 if not db and is_db_reference: 3631 self.raise_error(f"Expected database name but got {self._curr}") 3632 3633 table = self.expression( 3634 exp.Table, 3635 comments=comments, 3636 this=table, 3637 db=db, 3638 catalog=catalog, 3639 ) 3640 3641 changes = self._parse_changes() 3642 if changes: 3643 table.set("changes", changes) 3644 3645 at_before = self._parse_historical_data() 3646 if at_before: 3647 table.set("when", at_before) 3648 3649 pivots = self._parse_pivots() 3650 if pivots: 3651 table.set("pivots", pivots) 3652 3653 return table 3654 3655 def _parse_table( 3656 self, 3657 schema: bool = False, 3658 joins: bool = False, 3659 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3660 parse_bracket: bool = False, 3661 is_db_reference: bool = False, 3662 parse_partition: bool = False, 3663 ) -> t.Optional[exp.Expression]: 3664 lateral = self._parse_lateral() 3665 if lateral: 3666 return lateral 3667 3668 unnest = self._parse_unnest() 3669 if unnest: 3670 return unnest 3671 3672 values = self._parse_derived_table_values() 3673 if values: 3674 return values 3675 3676 subquery = self._parse_select(table=True) 3677 if subquery: 3678 if not subquery.args.get("pivots"): 3679 subquery.set("pivots", self._parse_pivots()) 3680 return subquery 3681 3682 bracket = parse_bracket and self._parse_bracket(None) 3683 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3684 3685 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3686 self._parse_table 3687 ) 3688 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3689 3690 only = self._match(TokenType.ONLY) 3691 3692 this = t.cast( 3693 exp.Expression, 3694 bracket 3695 or rows_from 3696 or self._parse_bracket( 3697 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3698 ), 3699 ) 3700 3701 if only: 3702 this.set("only", only) 3703 3704 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3705 self._match_text_seq("*") 3706 3707 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3708 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3709 this.set("partition", self._parse_partition()) 3710 3711 if schema: 3712 return self._parse_schema(this=this) 3713 3714 version = self._parse_version() 3715 3716 if version: 3717 this.set("version", version) 3718 3719 if self.dialect.ALIAS_POST_TABLESAMPLE: 3720 this.set("sample", self._parse_table_sample()) 3721 3722 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3723 if alias: 3724 this.set("alias", alias) 3725 3726 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3727 return self.expression( 3728 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3729 ) 3730 3731 this.set("hints", self._parse_table_hints()) 3732 3733 if not this.args.get("pivots"): 3734 this.set("pivots", self._parse_pivots()) 3735 3736 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3737 this.set("sample", self._parse_table_sample()) 3738 3739 if joins: 3740 for join in self._parse_joins(): 3741 this.append("joins", join) 3742 3743 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3744 this.set("ordinality", True) 3745 this.set("alias", self._parse_table_alias()) 3746 3747 return this 3748 3749 def _parse_version(self) -> t.Optional[exp.Version]: 3750 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3751 this = "TIMESTAMP" 3752 elif self._match(TokenType.VERSION_SNAPSHOT): 3753 this = "VERSION" 3754 else: 3755 return None 3756 3757 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3758 kind = self._prev.text.upper() 3759 start = self._parse_bitwise() 3760 self._match_texts(("TO", "AND")) 3761 end = self._parse_bitwise() 3762 expression: t.Optional[exp.Expression] = self.expression( 3763 exp.Tuple, expressions=[start, end] 3764 ) 3765 elif self._match_text_seq("CONTAINED", "IN"): 3766 kind = "CONTAINED IN" 3767 expression = self.expression( 3768 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3769 ) 3770 elif self._match(TokenType.ALL): 3771 kind = "ALL" 3772 expression = None 3773 else: 3774 self._match_text_seq("AS", "OF") 3775 kind = "AS OF" 3776 expression = self._parse_type() 3777 3778 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3779 3780 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3781 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3782 index = self._index 3783 historical_data = None 3784 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3785 this = self._prev.text.upper() 3786 kind = ( 3787 self._match(TokenType.L_PAREN) 3788 and self._match_texts(self.HISTORICAL_DATA_KIND) 3789 and self._prev.text.upper() 3790 ) 3791 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3792 3793 if expression: 3794 self._match_r_paren() 3795 historical_data = self.expression( 3796 exp.HistoricalData, this=this, kind=kind, expression=expression 3797 ) 3798 else: 3799 self._retreat(index) 3800 3801 return historical_data 3802 3803 def _parse_changes(self) -> t.Optional[exp.Changes]: 3804 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3805 return None 3806 3807 information = self._parse_var(any_token=True) 3808 self._match_r_paren() 3809 3810 return self.expression( 3811 exp.Changes, 3812 information=information, 3813 at_before=self._parse_historical_data(), 3814 end=self._parse_historical_data(), 3815 ) 3816 3817 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3818 if not self._match(TokenType.UNNEST): 3819 return None 3820 3821 expressions = self._parse_wrapped_csv(self._parse_equality) 3822 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3823 3824 alias = self._parse_table_alias() if with_alias else None 3825 3826 if alias: 3827 if self.dialect.UNNEST_COLUMN_ONLY: 3828 if alias.args.get("columns"): 3829 self.raise_error("Unexpected extra column alias in unnest.") 3830 3831 alias.set("columns", [alias.this]) 3832 alias.set("this", None) 3833 3834 columns = alias.args.get("columns") or [] 3835 if offset and len(expressions) < len(columns): 3836 offset = columns.pop() 3837 3838 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3839 self._match(TokenType.ALIAS) 3840 offset = self._parse_id_var( 3841 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3842 ) or exp.to_identifier("offset") 3843 3844 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3845 3846 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3847 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3848 if not is_derived and not ( 3849 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3850 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3851 ): 3852 return None 3853 3854 expressions = self._parse_csv(self._parse_value) 3855 alias = self._parse_table_alias() 3856 3857 if is_derived: 3858 self._match_r_paren() 3859 3860 return self.expression( 3861 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3862 ) 3863 3864 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3865 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3866 as_modifier and self._match_text_seq("USING", "SAMPLE") 3867 ): 3868 return None 3869 3870 bucket_numerator = None 3871 bucket_denominator = None 3872 bucket_field = None 3873 percent = None 3874 size = None 3875 seed = None 3876 3877 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3878 matched_l_paren = self._match(TokenType.L_PAREN) 3879 3880 if self.TABLESAMPLE_CSV: 3881 num = None 3882 expressions = self._parse_csv(self._parse_primary) 3883 else: 3884 expressions = None 3885 num = ( 3886 self._parse_factor() 3887 if self._match(TokenType.NUMBER, advance=False) 3888 else self._parse_primary() or self._parse_placeholder() 3889 ) 3890 3891 if self._match_text_seq("BUCKET"): 3892 bucket_numerator = self._parse_number() 3893 self._match_text_seq("OUT", "OF") 3894 bucket_denominator = bucket_denominator = self._parse_number() 3895 self._match(TokenType.ON) 3896 bucket_field = self._parse_field() 3897 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3898 percent = num 3899 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3900 size = num 3901 else: 3902 percent = num 3903 3904 if matched_l_paren: 3905 self._match_r_paren() 3906 3907 if self._match(TokenType.L_PAREN): 3908 method = self._parse_var(upper=True) 3909 seed = self._match(TokenType.COMMA) and self._parse_number() 3910 self._match_r_paren() 3911 elif self._match_texts(("SEED", "REPEATABLE")): 3912 seed = self._parse_wrapped(self._parse_number) 3913 3914 if not method and self.DEFAULT_SAMPLING_METHOD: 3915 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3916 3917 return self.expression( 3918 exp.TableSample, 3919 expressions=expressions, 3920 method=method, 3921 bucket_numerator=bucket_numerator, 3922 bucket_denominator=bucket_denominator, 3923 bucket_field=bucket_field, 3924 percent=percent, 3925 size=size, 3926 seed=seed, 3927 ) 3928 3929 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3930 return list(iter(self._parse_pivot, None)) or None 3931 3932 def _parse_joins(self) -> t.Iterator[exp.Join]: 3933 return iter(self._parse_join, None) 3934 3935 # https://duckdb.org/docs/sql/statements/pivot 3936 def _parse_simplified_pivot(self) -> exp.Pivot: 3937 def _parse_on() -> t.Optional[exp.Expression]: 3938 this = self._parse_bitwise() 3939 return self._parse_in(this) if self._match(TokenType.IN) else this 3940 3941 this = self._parse_table() 3942 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3943 using = self._match(TokenType.USING) and self._parse_csv( 3944 lambda: self._parse_alias(self._parse_function()) 3945 ) 3946 group = self._parse_group() 3947 return self.expression( 3948 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3949 ) 3950 3951 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3952 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3953 this = self._parse_select_or_expression() 3954 3955 self._match(TokenType.ALIAS) 3956 alias = self._parse_bitwise() 3957 if alias: 3958 if isinstance(alias, exp.Column) and not alias.db: 3959 alias = alias.this 3960 return self.expression(exp.PivotAlias, this=this, alias=alias) 3961 3962 return this 3963 3964 value = self._parse_column() 3965 3966 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3967 self.raise_error("Expecting IN (") 3968 3969 if self._match(TokenType.ANY): 3970 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3971 else: 3972 exprs = self._parse_csv(_parse_aliased_expression) 3973 3974 self._match_r_paren() 3975 return self.expression(exp.In, this=value, expressions=exprs) 3976 3977 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3978 index = self._index 3979 include_nulls = None 3980 3981 if self._match(TokenType.PIVOT): 3982 unpivot = False 3983 elif self._match(TokenType.UNPIVOT): 3984 unpivot = True 3985 3986 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3987 if self._match_text_seq("INCLUDE", "NULLS"): 3988 include_nulls = True 3989 elif self._match_text_seq("EXCLUDE", "NULLS"): 3990 include_nulls = False 3991 else: 3992 return None 3993 3994 expressions = [] 3995 3996 if not self._match(TokenType.L_PAREN): 3997 self._retreat(index) 3998 return None 3999 4000 if unpivot: 4001 expressions = self._parse_csv(self._parse_column) 4002 else: 4003 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4004 4005 if not expressions: 4006 self.raise_error("Failed to parse PIVOT's aggregation list") 4007 4008 if not self._match(TokenType.FOR): 4009 self.raise_error("Expecting FOR") 4010 4011 field = self._parse_pivot_in() 4012 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4013 self._parse_bitwise 4014 ) 4015 4016 self._match_r_paren() 4017 4018 pivot = self.expression( 4019 exp.Pivot, 4020 expressions=expressions, 4021 field=field, 4022 unpivot=unpivot, 4023 include_nulls=include_nulls, 4024 default_on_null=default_on_null, 4025 ) 4026 4027 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4028 pivot.set("alias", self._parse_table_alias()) 4029 4030 if not unpivot: 4031 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4032 4033 columns: t.List[exp.Expression] = [] 4034 for fld in pivot.args["field"].expressions: 4035 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4036 for name in names: 4037 if self.PREFIXED_PIVOT_COLUMNS: 4038 name = f"{name}_{field_name}" if name else field_name 4039 else: 4040 name = f"{field_name}_{name}" if name else field_name 4041 4042 columns.append(exp.to_identifier(name)) 4043 4044 pivot.set("columns", columns) 4045 4046 return pivot 4047 4048 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4049 return [agg.alias for agg in aggregations] 4050 4051 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4052 if not skip_where_token and not self._match(TokenType.PREWHERE): 4053 return None 4054 4055 return self.expression( 4056 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4057 ) 4058 4059 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4060 if not skip_where_token and not self._match(TokenType.WHERE): 4061 return None 4062 4063 return self.expression( 4064 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4065 ) 4066 4067 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4068 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4069 return None 4070 4071 elements: t.Dict[str, t.Any] = defaultdict(list) 4072 4073 if self._match(TokenType.ALL): 4074 elements["all"] = True 4075 elif self._match(TokenType.DISTINCT): 4076 elements["all"] = False 4077 4078 while True: 4079 index = self._index 4080 4081 elements["expressions"].extend( 4082 self._parse_csv( 4083 lambda: None 4084 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4085 else self._parse_assignment() 4086 ) 4087 ) 4088 4089 before_with_index = self._index 4090 with_prefix = self._match(TokenType.WITH) 4091 4092 if self._match(TokenType.ROLLUP): 4093 elements["rollup"].append( 4094 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4095 ) 4096 elif self._match(TokenType.CUBE): 4097 elements["cube"].append( 4098 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4099 ) 4100 elif self._match(TokenType.GROUPING_SETS): 4101 elements["grouping_sets"].append( 4102 self.expression( 4103 exp.GroupingSets, 4104 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4105 ) 4106 ) 4107 elif self._match_text_seq("TOTALS"): 4108 elements["totals"] = True # type: ignore 4109 4110 if before_with_index <= self._index <= before_with_index + 1: 4111 self._retreat(before_with_index) 4112 break 4113 4114 if index == self._index: 4115 break 4116 4117 return self.expression(exp.Group, **elements) # type: ignore 4118 4119 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4120 return self.expression( 4121 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4122 ) 4123 4124 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4125 if self._match(TokenType.L_PAREN): 4126 grouping_set = self._parse_csv(self._parse_column) 4127 self._match_r_paren() 4128 return self.expression(exp.Tuple, expressions=grouping_set) 4129 4130 return self._parse_column() 4131 4132 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4133 if not skip_having_token and not self._match(TokenType.HAVING): 4134 return None 4135 return self.expression(exp.Having, this=self._parse_assignment()) 4136 4137 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4138 if not self._match(TokenType.QUALIFY): 4139 return None 4140 return self.expression(exp.Qualify, this=self._parse_assignment()) 4141 4142 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4143 if skip_start_token: 4144 start = None 4145 elif self._match(TokenType.START_WITH): 4146 start = self._parse_assignment() 4147 else: 4148 return None 4149 4150 self._match(TokenType.CONNECT_BY) 4151 nocycle = self._match_text_seq("NOCYCLE") 4152 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4153 exp.Prior, this=self._parse_bitwise() 4154 ) 4155 connect = self._parse_assignment() 4156 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4157 4158 if not start and self._match(TokenType.START_WITH): 4159 start = self._parse_assignment() 4160 4161 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4162 4163 def _parse_name_as_expression(self) -> exp.Alias: 4164 return self.expression( 4165 exp.Alias, 4166 alias=self._parse_id_var(any_token=True), 4167 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4168 ) 4169 4170 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4171 if self._match_text_seq("INTERPOLATE"): 4172 return self._parse_wrapped_csv(self._parse_name_as_expression) 4173 return None 4174 4175 def _parse_order( 4176 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4177 ) -> t.Optional[exp.Expression]: 4178 siblings = None 4179 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4180 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4181 return this 4182 4183 siblings = True 4184 4185 return self.expression( 4186 exp.Order, 4187 this=this, 4188 expressions=self._parse_csv(self._parse_ordered), 4189 siblings=siblings, 4190 ) 4191 4192 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4193 if not self._match(token): 4194 return None 4195 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4196 4197 def _parse_ordered( 4198 self, parse_method: t.Optional[t.Callable] = None 4199 ) -> t.Optional[exp.Ordered]: 4200 this = parse_method() if parse_method else self._parse_assignment() 4201 if not this: 4202 return None 4203 4204 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4205 this = exp.var("ALL") 4206 4207 asc = self._match(TokenType.ASC) 4208 desc = self._match(TokenType.DESC) or (asc and False) 4209 4210 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4211 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4212 4213 nulls_first = is_nulls_first or False 4214 explicitly_null_ordered = is_nulls_first or is_nulls_last 4215 4216 if ( 4217 not explicitly_null_ordered 4218 and ( 4219 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4220 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4221 ) 4222 and self.dialect.NULL_ORDERING != "nulls_are_last" 4223 ): 4224 nulls_first = True 4225 4226 if self._match_text_seq("WITH", "FILL"): 4227 with_fill = self.expression( 4228 exp.WithFill, 4229 **{ # type: ignore 4230 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4231 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4232 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4233 "interpolate": self._parse_interpolate(), 4234 }, 4235 ) 4236 else: 4237 with_fill = None 4238 4239 return self.expression( 4240 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4241 ) 4242 4243 def _parse_limit( 4244 self, 4245 this: t.Optional[exp.Expression] = None, 4246 top: bool = False, 4247 skip_limit_token: bool = False, 4248 ) -> t.Optional[exp.Expression]: 4249 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4250 comments = self._prev_comments 4251 if top: 4252 limit_paren = self._match(TokenType.L_PAREN) 4253 expression = self._parse_term() if limit_paren else self._parse_number() 4254 4255 if limit_paren: 4256 self._match_r_paren() 4257 else: 4258 expression = self._parse_term() 4259 4260 if self._match(TokenType.COMMA): 4261 offset = expression 4262 expression = self._parse_term() 4263 else: 4264 offset = None 4265 4266 limit_exp = self.expression( 4267 exp.Limit, 4268 this=this, 4269 expression=expression, 4270 offset=offset, 4271 comments=comments, 4272 expressions=self._parse_limit_by(), 4273 ) 4274 4275 return limit_exp 4276 4277 if self._match(TokenType.FETCH): 4278 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4279 direction = self._prev.text.upper() if direction else "FIRST" 4280 4281 count = self._parse_field(tokens=self.FETCH_TOKENS) 4282 percent = self._match(TokenType.PERCENT) 4283 4284 self._match_set((TokenType.ROW, TokenType.ROWS)) 4285 4286 only = self._match_text_seq("ONLY") 4287 with_ties = self._match_text_seq("WITH", "TIES") 4288 4289 if only and with_ties: 4290 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4291 4292 return self.expression( 4293 exp.Fetch, 4294 direction=direction, 4295 count=count, 4296 percent=percent, 4297 with_ties=with_ties, 4298 ) 4299 4300 return this 4301 4302 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4303 if not self._match(TokenType.OFFSET): 4304 return this 4305 4306 count = self._parse_term() 4307 self._match_set((TokenType.ROW, TokenType.ROWS)) 4308 4309 return self.expression( 4310 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4311 ) 4312 4313 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4314 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4315 4316 def _parse_locks(self) -> t.List[exp.Lock]: 4317 locks = [] 4318 while True: 4319 if self._match_text_seq("FOR", "UPDATE"): 4320 update = True 4321 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4322 "LOCK", "IN", "SHARE", "MODE" 4323 ): 4324 update = False 4325 else: 4326 break 4327 4328 expressions = None 4329 if self._match_text_seq("OF"): 4330 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4331 4332 wait: t.Optional[bool | exp.Expression] = None 4333 if self._match_text_seq("NOWAIT"): 4334 wait = True 4335 elif self._match_text_seq("WAIT"): 4336 wait = self._parse_primary() 4337 elif self._match_text_seq("SKIP", "LOCKED"): 4338 wait = False 4339 4340 locks.append( 4341 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4342 ) 4343 4344 return locks 4345 4346 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4347 while this and self._match_set(self.SET_OPERATIONS): 4348 token_type = self._prev.token_type 4349 4350 if token_type == TokenType.UNION: 4351 operation: t.Type[exp.SetOperation] = exp.Union 4352 elif token_type == TokenType.EXCEPT: 4353 operation = exp.Except 4354 else: 4355 operation = exp.Intersect 4356 4357 comments = self._prev.comments 4358 4359 if self._match(TokenType.DISTINCT): 4360 distinct: t.Optional[bool] = True 4361 elif self._match(TokenType.ALL): 4362 distinct = False 4363 else: 4364 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4365 if distinct is None: 4366 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4367 4368 by_name = self._match_text_seq("BY", "NAME") 4369 expression = self._parse_select(nested=True, parse_set_operation=False) 4370 4371 this = self.expression( 4372 operation, 4373 comments=comments, 4374 this=this, 4375 distinct=distinct, 4376 by_name=by_name, 4377 expression=expression, 4378 ) 4379 4380 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4381 expression = this.expression 4382 4383 if expression: 4384 for arg in self.SET_OP_MODIFIERS: 4385 expr = expression.args.get(arg) 4386 if expr: 4387 this.set(arg, expr.pop()) 4388 4389 return this 4390 4391 def _parse_expression(self) -> t.Optional[exp.Expression]: 4392 return self._parse_alias(self._parse_assignment()) 4393 4394 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4395 this = self._parse_disjunction() 4396 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4397 # This allows us to parse <non-identifier token> := <expr> 4398 this = exp.column( 4399 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4400 ) 4401 4402 while self._match_set(self.ASSIGNMENT): 4403 if isinstance(this, exp.Column) and len(this.parts) == 1: 4404 this = this.this 4405 4406 this = self.expression( 4407 self.ASSIGNMENT[self._prev.token_type], 4408 this=this, 4409 comments=self._prev_comments, 4410 expression=self._parse_assignment(), 4411 ) 4412 4413 return this 4414 4415 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4416 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4417 4418 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4419 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4420 4421 def _parse_equality(self) -> t.Optional[exp.Expression]: 4422 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4423 4424 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4425 return self._parse_tokens(self._parse_range, self.COMPARISON) 4426 4427 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4428 this = this or self._parse_bitwise() 4429 negate = self._match(TokenType.NOT) 4430 4431 if self._match_set(self.RANGE_PARSERS): 4432 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4433 if not expression: 4434 return this 4435 4436 this = expression 4437 elif self._match(TokenType.ISNULL): 4438 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4439 4440 # Postgres supports ISNULL and NOTNULL for conditions. 4441 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4442 if self._match(TokenType.NOTNULL): 4443 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4444 this = self.expression(exp.Not, this=this) 4445 4446 if negate: 4447 this = self._negate_range(this) 4448 4449 if self._match(TokenType.IS): 4450 this = self._parse_is(this) 4451 4452 return this 4453 4454 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4455 if not this: 4456 return this 4457 4458 return self.expression(exp.Not, this=this) 4459 4460 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4461 index = self._index - 1 4462 negate = self._match(TokenType.NOT) 4463 4464 if self._match_text_seq("DISTINCT", "FROM"): 4465 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4466 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4467 4468 if self._match(TokenType.JSON): 4469 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4470 4471 if self._match_text_seq("WITH"): 4472 _with = True 4473 elif self._match_text_seq("WITHOUT"): 4474 _with = False 4475 else: 4476 _with = None 4477 4478 unique = self._match(TokenType.UNIQUE) 4479 self._match_text_seq("KEYS") 4480 expression: t.Optional[exp.Expression] = self.expression( 4481 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4482 ) 4483 else: 4484 expression = self._parse_primary() or self._parse_null() 4485 if not expression: 4486 self._retreat(index) 4487 return None 4488 4489 this = self.expression(exp.Is, this=this, expression=expression) 4490 return self.expression(exp.Not, this=this) if negate else this 4491 4492 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4493 unnest = self._parse_unnest(with_alias=False) 4494 if unnest: 4495 this = self.expression(exp.In, this=this, unnest=unnest) 4496 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4497 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4498 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4499 4500 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4501 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4502 else: 4503 this = self.expression(exp.In, this=this, expressions=expressions) 4504 4505 if matched_l_paren: 4506 self._match_r_paren(this) 4507 elif not self._match(TokenType.R_BRACKET, expression=this): 4508 self.raise_error("Expecting ]") 4509 else: 4510 this = self.expression(exp.In, this=this, field=self._parse_column()) 4511 4512 return this 4513 4514 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4515 low = self._parse_bitwise() 4516 self._match(TokenType.AND) 4517 high = self._parse_bitwise() 4518 return self.expression(exp.Between, this=this, low=low, high=high) 4519 4520 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4521 if not self._match(TokenType.ESCAPE): 4522 return this 4523 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4524 4525 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4526 index = self._index 4527 4528 if not self._match(TokenType.INTERVAL) and match_interval: 4529 return None 4530 4531 if self._match(TokenType.STRING, advance=False): 4532 this = self._parse_primary() 4533 else: 4534 this = self._parse_term() 4535 4536 if not this or ( 4537 isinstance(this, exp.Column) 4538 and not this.table 4539 and not this.this.quoted 4540 and this.name.upper() == "IS" 4541 ): 4542 self._retreat(index) 4543 return None 4544 4545 unit = self._parse_function() or ( 4546 not self._match(TokenType.ALIAS, advance=False) 4547 and self._parse_var(any_token=True, upper=True) 4548 ) 4549 4550 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4551 # each INTERVAL expression into this canonical form so it's easy to transpile 4552 if this and this.is_number: 4553 this = exp.Literal.string(this.to_py()) 4554 elif this and this.is_string: 4555 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4556 if len(parts) == 1: 4557 if unit: 4558 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4559 self._retreat(self._index - 1) 4560 4561 this = exp.Literal.string(parts[0][0]) 4562 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4563 4564 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4565 unit = self.expression( 4566 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4567 ) 4568 4569 interval = self.expression(exp.Interval, this=this, unit=unit) 4570 4571 index = self._index 4572 self._match(TokenType.PLUS) 4573 4574 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4575 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4576 return self.expression( 4577 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4578 ) 4579 4580 self._retreat(index) 4581 return interval 4582 4583 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4584 this = self._parse_term() 4585 4586 while True: 4587 if self._match_set(self.BITWISE): 4588 this = self.expression( 4589 self.BITWISE[self._prev.token_type], 4590 this=this, 4591 expression=self._parse_term(), 4592 ) 4593 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4594 this = self.expression( 4595 exp.DPipe, 4596 this=this, 4597 expression=self._parse_term(), 4598 safe=not self.dialect.STRICT_STRING_CONCAT, 4599 ) 4600 elif self._match(TokenType.DQMARK): 4601 this = self.expression( 4602 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4603 ) 4604 elif self._match_pair(TokenType.LT, TokenType.LT): 4605 this = self.expression( 4606 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4607 ) 4608 elif self._match_pair(TokenType.GT, TokenType.GT): 4609 this = self.expression( 4610 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4611 ) 4612 else: 4613 break 4614 4615 return this 4616 4617 def _parse_term(self) -> t.Optional[exp.Expression]: 4618 this = self._parse_factor() 4619 4620 while self._match_set(self.TERM): 4621 klass = self.TERM[self._prev.token_type] 4622 comments = self._prev_comments 4623 expression = self._parse_factor() 4624 4625 this = self.expression(klass, this=this, comments=comments, expression=expression) 4626 4627 if isinstance(this, exp.Collate): 4628 expr = this.expression 4629 4630 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4631 # fallback to Identifier / Var 4632 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4633 ident = expr.this 4634 if isinstance(ident, exp.Identifier): 4635 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4636 4637 return this 4638 4639 def _parse_factor(self) -> t.Optional[exp.Expression]: 4640 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4641 this = parse_method() 4642 4643 while self._match_set(self.FACTOR): 4644 klass = self.FACTOR[self._prev.token_type] 4645 comments = self._prev_comments 4646 expression = parse_method() 4647 4648 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4649 self._retreat(self._index - 1) 4650 return this 4651 4652 this = self.expression(klass, this=this, comments=comments, expression=expression) 4653 4654 if isinstance(this, exp.Div): 4655 this.args["typed"] = self.dialect.TYPED_DIVISION 4656 this.args["safe"] = self.dialect.SAFE_DIVISION 4657 4658 return this 4659 4660 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4661 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4662 4663 def _parse_unary(self) -> t.Optional[exp.Expression]: 4664 if self._match_set(self.UNARY_PARSERS): 4665 return self.UNARY_PARSERS[self._prev.token_type](self) 4666 return self._parse_at_time_zone(self._parse_type()) 4667 4668 def _parse_type( 4669 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4670 ) -> t.Optional[exp.Expression]: 4671 interval = parse_interval and self._parse_interval() 4672 if interval: 4673 return interval 4674 4675 index = self._index 4676 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4677 4678 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4679 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4680 if isinstance(data_type, exp.Cast): 4681 # This constructor can contain ops directly after it, for instance struct unnesting: 4682 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4683 return self._parse_column_ops(data_type) 4684 4685 if data_type: 4686 index2 = self._index 4687 this = self._parse_primary() 4688 4689 if isinstance(this, exp.Literal): 4690 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4691 if parser: 4692 return parser(self, this, data_type) 4693 4694 return self.expression(exp.Cast, this=this, to=data_type) 4695 4696 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4697 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4698 # 4699 # If the index difference here is greater than 1, that means the parser itself must have 4700 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4701 # 4702 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4703 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4704 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4705 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4706 # 4707 # In these cases, we don't really want to return the converted type, but instead retreat 4708 # and try to parse a Column or Identifier in the section below. 4709 if data_type.expressions and index2 - index > 1: 4710 self._retreat(index2) 4711 return self._parse_column_ops(data_type) 4712 4713 self._retreat(index) 4714 4715 if fallback_to_identifier: 4716 return self._parse_id_var() 4717 4718 this = self._parse_column() 4719 return this and self._parse_column_ops(this) 4720 4721 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4722 this = self._parse_type() 4723 if not this: 4724 return None 4725 4726 if isinstance(this, exp.Column) and not this.table: 4727 this = exp.var(this.name.upper()) 4728 4729 return self.expression( 4730 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4731 ) 4732 4733 def _parse_types( 4734 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4735 ) -> t.Optional[exp.Expression]: 4736 index = self._index 4737 4738 this: t.Optional[exp.Expression] = None 4739 prefix = self._match_text_seq("SYSUDTLIB", ".") 4740 4741 if not self._match_set(self.TYPE_TOKENS): 4742 identifier = allow_identifiers and self._parse_id_var( 4743 any_token=False, tokens=(TokenType.VAR,) 4744 ) 4745 if isinstance(identifier, exp.Identifier): 4746 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4747 4748 if len(tokens) != 1: 4749 self.raise_error("Unexpected identifier", self._prev) 4750 4751 if tokens[0].token_type in self.TYPE_TOKENS: 4752 self._prev = tokens[0] 4753 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4754 type_name = identifier.name 4755 4756 while self._match(TokenType.DOT): 4757 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4758 4759 this = exp.DataType.build(type_name, udt=True) 4760 else: 4761 self._retreat(self._index - 1) 4762 return None 4763 else: 4764 return None 4765 4766 type_token = self._prev.token_type 4767 4768 if type_token == TokenType.PSEUDO_TYPE: 4769 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4770 4771 if type_token == TokenType.OBJECT_IDENTIFIER: 4772 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4773 4774 # https://materialize.com/docs/sql/types/map/ 4775 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4776 key_type = self._parse_types( 4777 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4778 ) 4779 if not self._match(TokenType.FARROW): 4780 self._retreat(index) 4781 return None 4782 4783 value_type = self._parse_types( 4784 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4785 ) 4786 if not self._match(TokenType.R_BRACKET): 4787 self._retreat(index) 4788 return None 4789 4790 return exp.DataType( 4791 this=exp.DataType.Type.MAP, 4792 expressions=[key_type, value_type], 4793 nested=True, 4794 prefix=prefix, 4795 ) 4796 4797 nested = type_token in self.NESTED_TYPE_TOKENS 4798 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4799 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4800 expressions = None 4801 maybe_func = False 4802 4803 if self._match(TokenType.L_PAREN): 4804 if is_struct: 4805 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4806 elif nested: 4807 expressions = self._parse_csv( 4808 lambda: self._parse_types( 4809 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4810 ) 4811 ) 4812 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4813 this = expressions[0] 4814 this.set("nullable", True) 4815 self._match_r_paren() 4816 return this 4817 elif type_token in self.ENUM_TYPE_TOKENS: 4818 expressions = self._parse_csv(self._parse_equality) 4819 elif is_aggregate: 4820 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4821 any_token=False, tokens=(TokenType.VAR,) 4822 ) 4823 if not func_or_ident or not self._match(TokenType.COMMA): 4824 return None 4825 expressions = self._parse_csv( 4826 lambda: self._parse_types( 4827 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4828 ) 4829 ) 4830 expressions.insert(0, func_or_ident) 4831 else: 4832 expressions = self._parse_csv(self._parse_type_size) 4833 4834 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4835 if type_token == TokenType.VECTOR and len(expressions) == 2: 4836 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4837 4838 if not expressions or not self._match(TokenType.R_PAREN): 4839 self._retreat(index) 4840 return None 4841 4842 maybe_func = True 4843 4844 values: t.Optional[t.List[exp.Expression]] = None 4845 4846 if nested and self._match(TokenType.LT): 4847 if is_struct: 4848 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4849 else: 4850 expressions = self._parse_csv( 4851 lambda: self._parse_types( 4852 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4853 ) 4854 ) 4855 4856 if not self._match(TokenType.GT): 4857 self.raise_error("Expecting >") 4858 4859 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4860 values = self._parse_csv(self._parse_assignment) 4861 if not values and is_struct: 4862 values = None 4863 self._retreat(self._index - 1) 4864 else: 4865 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4866 4867 if type_token in self.TIMESTAMPS: 4868 if self._match_text_seq("WITH", "TIME", "ZONE"): 4869 maybe_func = False 4870 tz_type = ( 4871 exp.DataType.Type.TIMETZ 4872 if type_token in self.TIMES 4873 else exp.DataType.Type.TIMESTAMPTZ 4874 ) 4875 this = exp.DataType(this=tz_type, expressions=expressions) 4876 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4877 maybe_func = False 4878 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4879 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4880 maybe_func = False 4881 elif type_token == TokenType.INTERVAL: 4882 unit = self._parse_var(upper=True) 4883 if unit: 4884 if self._match_text_seq("TO"): 4885 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4886 4887 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4888 else: 4889 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4890 4891 if maybe_func and check_func: 4892 index2 = self._index 4893 peek = self._parse_string() 4894 4895 if not peek: 4896 self._retreat(index) 4897 return None 4898 4899 self._retreat(index2) 4900 4901 if not this: 4902 if self._match_text_seq("UNSIGNED"): 4903 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4904 if not unsigned_type_token: 4905 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4906 4907 type_token = unsigned_type_token or type_token 4908 4909 this = exp.DataType( 4910 this=exp.DataType.Type[type_token.value], 4911 expressions=expressions, 4912 nested=nested, 4913 prefix=prefix, 4914 ) 4915 4916 # Empty arrays/structs are allowed 4917 if values is not None: 4918 cls = exp.Struct if is_struct else exp.Array 4919 this = exp.cast(cls(expressions=values), this, copy=False) 4920 4921 elif expressions: 4922 this.set("expressions", expressions) 4923 4924 # https://materialize.com/docs/sql/types/list/#type-name 4925 while self._match(TokenType.LIST): 4926 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4927 4928 index = self._index 4929 4930 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4931 matched_array = self._match(TokenType.ARRAY) 4932 4933 while self._curr: 4934 datatype_token = self._prev.token_type 4935 matched_l_bracket = self._match(TokenType.L_BRACKET) 4936 if not matched_l_bracket and not matched_array: 4937 break 4938 4939 matched_array = False 4940 values = self._parse_csv(self._parse_assignment) or None 4941 if ( 4942 values 4943 and not schema 4944 and ( 4945 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4946 ) 4947 ): 4948 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4949 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4950 self._retreat(index) 4951 break 4952 4953 this = exp.DataType( 4954 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4955 ) 4956 self._match(TokenType.R_BRACKET) 4957 4958 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4959 converter = self.TYPE_CONVERTERS.get(this.this) 4960 if converter: 4961 this = converter(t.cast(exp.DataType, this)) 4962 4963 return this 4964 4965 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4966 index = self._index 4967 4968 if ( 4969 self._curr 4970 and self._next 4971 and self._curr.token_type in self.TYPE_TOKENS 4972 and self._next.token_type in self.TYPE_TOKENS 4973 ): 4974 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4975 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4976 this = self._parse_id_var() 4977 else: 4978 this = ( 4979 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4980 or self._parse_id_var() 4981 ) 4982 4983 self._match(TokenType.COLON) 4984 4985 if ( 4986 type_required 4987 and not isinstance(this, exp.DataType) 4988 and not self._match_set(self.TYPE_TOKENS, advance=False) 4989 ): 4990 self._retreat(index) 4991 return self._parse_types() 4992 4993 return self._parse_column_def(this) 4994 4995 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4996 if not self._match_text_seq("AT", "TIME", "ZONE"): 4997 return this 4998 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4999 5000 def _parse_column(self) -> t.Optional[exp.Expression]: 5001 this = self._parse_column_reference() 5002 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5003 5004 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5005 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5006 5007 return column 5008 5009 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5010 this = self._parse_field() 5011 if ( 5012 not this 5013 and self._match(TokenType.VALUES, advance=False) 5014 and self.VALUES_FOLLOWED_BY_PAREN 5015 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5016 ): 5017 this = self._parse_id_var() 5018 5019 if isinstance(this, exp.Identifier): 5020 # We bubble up comments from the Identifier to the Column 5021 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5022 5023 return this 5024 5025 def _parse_colon_as_variant_extract( 5026 self, this: t.Optional[exp.Expression] 5027 ) -> t.Optional[exp.Expression]: 5028 casts = [] 5029 json_path = [] 5030 escape = None 5031 5032 while self._match(TokenType.COLON): 5033 start_index = self._index 5034 5035 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5036 path = self._parse_column_ops( 5037 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5038 ) 5039 5040 # The cast :: operator has a lower precedence than the extraction operator :, so 5041 # we rearrange the AST appropriately to avoid casting the JSON path 5042 while isinstance(path, exp.Cast): 5043 casts.append(path.to) 5044 path = path.this 5045 5046 if casts: 5047 dcolon_offset = next( 5048 i 5049 for i, t in enumerate(self._tokens[start_index:]) 5050 if t.token_type == TokenType.DCOLON 5051 ) 5052 end_token = self._tokens[start_index + dcolon_offset - 1] 5053 else: 5054 end_token = self._prev 5055 5056 if path: 5057 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5058 # it'll roundtrip to a string literal in GET_PATH 5059 if isinstance(path, exp.Identifier) and path.quoted: 5060 escape = True 5061 5062 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5063 5064 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5065 # Databricks transforms it back to the colon/dot notation 5066 if json_path: 5067 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5068 5069 if json_path_expr: 5070 json_path_expr.set("escape", escape) 5071 5072 this = self.expression( 5073 exp.JSONExtract, 5074 this=this, 5075 expression=json_path_expr, 5076 variant_extract=True, 5077 ) 5078 5079 while casts: 5080 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5081 5082 return this 5083 5084 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5085 return self._parse_types() 5086 5087 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5088 this = self._parse_bracket(this) 5089 5090 while self._match_set(self.COLUMN_OPERATORS): 5091 op_token = self._prev.token_type 5092 op = self.COLUMN_OPERATORS.get(op_token) 5093 5094 if op_token == TokenType.DCOLON: 5095 field = self._parse_dcolon() 5096 if not field: 5097 self.raise_error("Expected type") 5098 elif op and self._curr: 5099 field = self._parse_column_reference() or self._parse_bracket() 5100 else: 5101 field = self._parse_field(any_token=True, anonymous_func=True) 5102 5103 if isinstance(field, exp.Func) and this: 5104 # bigquery allows function calls like x.y.count(...) 5105 # SAFE.SUBSTR(...) 5106 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5107 this = exp.replace_tree( 5108 this, 5109 lambda n: ( 5110 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5111 if n.table 5112 else n.this 5113 ) 5114 if isinstance(n, exp.Column) 5115 else n, 5116 ) 5117 5118 if op: 5119 this = op(self, this, field) 5120 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5121 this = self.expression( 5122 exp.Column, 5123 comments=this.comments, 5124 this=field, 5125 table=this.this, 5126 db=this.args.get("table"), 5127 catalog=this.args.get("db"), 5128 ) 5129 else: 5130 this = self.expression(exp.Dot, this=this, expression=field) 5131 5132 this = self._parse_bracket(this) 5133 5134 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5135 5136 def _parse_primary(self) -> t.Optional[exp.Expression]: 5137 if self._match_set(self.PRIMARY_PARSERS): 5138 token_type = self._prev.token_type 5139 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5140 5141 if token_type == TokenType.STRING: 5142 expressions = [primary] 5143 while self._match(TokenType.STRING): 5144 expressions.append(exp.Literal.string(self._prev.text)) 5145 5146 if len(expressions) > 1: 5147 return self.expression(exp.Concat, expressions=expressions) 5148 5149 return primary 5150 5151 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5152 return exp.Literal.number(f"0.{self._prev.text}") 5153 5154 if self._match(TokenType.L_PAREN): 5155 comments = self._prev_comments 5156 query = self._parse_select() 5157 5158 if query: 5159 expressions = [query] 5160 else: 5161 expressions = self._parse_expressions() 5162 5163 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5164 5165 if not this and self._match(TokenType.R_PAREN, advance=False): 5166 this = self.expression(exp.Tuple) 5167 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5168 this = self._parse_subquery(this=this, parse_alias=False) 5169 elif isinstance(this, exp.Subquery): 5170 this = self._parse_subquery( 5171 this=self._parse_set_operations(this), parse_alias=False 5172 ) 5173 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5174 this = self.expression(exp.Tuple, expressions=expressions) 5175 else: 5176 this = self.expression(exp.Paren, this=this) 5177 5178 if this: 5179 this.add_comments(comments) 5180 5181 self._match_r_paren(expression=this) 5182 return this 5183 5184 return None 5185 5186 def _parse_field( 5187 self, 5188 any_token: bool = False, 5189 tokens: t.Optional[t.Collection[TokenType]] = None, 5190 anonymous_func: bool = False, 5191 ) -> t.Optional[exp.Expression]: 5192 if anonymous_func: 5193 field = ( 5194 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5195 or self._parse_primary() 5196 ) 5197 else: 5198 field = self._parse_primary() or self._parse_function( 5199 anonymous=anonymous_func, any_token=any_token 5200 ) 5201 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5202 5203 def _parse_function( 5204 self, 5205 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5206 anonymous: bool = False, 5207 optional_parens: bool = True, 5208 any_token: bool = False, 5209 ) -> t.Optional[exp.Expression]: 5210 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5211 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5212 fn_syntax = False 5213 if ( 5214 self._match(TokenType.L_BRACE, advance=False) 5215 and self._next 5216 and self._next.text.upper() == "FN" 5217 ): 5218 self._advance(2) 5219 fn_syntax = True 5220 5221 func = self._parse_function_call( 5222 functions=functions, 5223 anonymous=anonymous, 5224 optional_parens=optional_parens, 5225 any_token=any_token, 5226 ) 5227 5228 if fn_syntax: 5229 self._match(TokenType.R_BRACE) 5230 5231 return func 5232 5233 def _parse_function_call( 5234 self, 5235 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5236 anonymous: bool = False, 5237 optional_parens: bool = True, 5238 any_token: bool = False, 5239 ) -> t.Optional[exp.Expression]: 5240 if not self._curr: 5241 return None 5242 5243 comments = self._curr.comments 5244 token_type = self._curr.token_type 5245 this = self._curr.text 5246 upper = this.upper() 5247 5248 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5249 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5250 self._advance() 5251 return self._parse_window(parser(self)) 5252 5253 if not self._next or self._next.token_type != TokenType.L_PAREN: 5254 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5255 self._advance() 5256 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5257 5258 return None 5259 5260 if any_token: 5261 if token_type in self.RESERVED_TOKENS: 5262 return None 5263 elif token_type not in self.FUNC_TOKENS: 5264 return None 5265 5266 self._advance(2) 5267 5268 parser = self.FUNCTION_PARSERS.get(upper) 5269 if parser and not anonymous: 5270 this = parser(self) 5271 else: 5272 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5273 5274 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5275 this = self.expression( 5276 subquery_predicate, comments=comments, this=self._parse_select() 5277 ) 5278 self._match_r_paren() 5279 return this 5280 5281 if functions is None: 5282 functions = self.FUNCTIONS 5283 5284 function = functions.get(upper) 5285 5286 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5287 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5288 5289 if alias: 5290 args = self._kv_to_prop_eq(args) 5291 5292 if function and not anonymous: 5293 if "dialect" in function.__code__.co_varnames: 5294 func = function(args, dialect=self.dialect) 5295 else: 5296 func = function(args) 5297 5298 func = self.validate_expression(func, args) 5299 if not self.dialect.NORMALIZE_FUNCTIONS: 5300 func.meta["name"] = this 5301 5302 this = func 5303 else: 5304 if token_type == TokenType.IDENTIFIER: 5305 this = exp.Identifier(this=this, quoted=True) 5306 this = self.expression(exp.Anonymous, this=this, expressions=args) 5307 5308 if isinstance(this, exp.Expression): 5309 this.add_comments(comments) 5310 5311 self._match_r_paren(this) 5312 return self._parse_window(this) 5313 5314 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5315 return expression 5316 5317 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5318 transformed = [] 5319 5320 for index, e in enumerate(expressions): 5321 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5322 if isinstance(e, exp.Alias): 5323 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5324 5325 if not isinstance(e, exp.PropertyEQ): 5326 e = self.expression( 5327 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5328 ) 5329 5330 if isinstance(e.this, exp.Column): 5331 e.this.replace(e.this.this) 5332 else: 5333 e = self._to_prop_eq(e, index) 5334 5335 transformed.append(e) 5336 5337 return transformed 5338 5339 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5340 return self._parse_column_def(self._parse_id_var()) 5341 5342 def _parse_user_defined_function( 5343 self, kind: t.Optional[TokenType] = None 5344 ) -> t.Optional[exp.Expression]: 5345 this = self._parse_id_var() 5346 5347 while self._match(TokenType.DOT): 5348 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5349 5350 if not self._match(TokenType.L_PAREN): 5351 return this 5352 5353 expressions = self._parse_csv(self._parse_function_parameter) 5354 self._match_r_paren() 5355 return self.expression( 5356 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5357 ) 5358 5359 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5360 literal = self._parse_primary() 5361 if literal: 5362 return self.expression(exp.Introducer, this=token.text, expression=literal) 5363 5364 return self.expression(exp.Identifier, this=token.text) 5365 5366 def _parse_session_parameter(self) -> exp.SessionParameter: 5367 kind = None 5368 this = self._parse_id_var() or self._parse_primary() 5369 5370 if this and self._match(TokenType.DOT): 5371 kind = this.name 5372 this = self._parse_var() or self._parse_primary() 5373 5374 return self.expression(exp.SessionParameter, this=this, kind=kind) 5375 5376 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5377 return self._parse_id_var() 5378 5379 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5380 index = self._index 5381 5382 if self._match(TokenType.L_PAREN): 5383 expressions = t.cast( 5384 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5385 ) 5386 5387 if not self._match(TokenType.R_PAREN): 5388 self._retreat(index) 5389 else: 5390 expressions = [self._parse_lambda_arg()] 5391 5392 if self._match_set(self.LAMBDAS): 5393 return self.LAMBDAS[self._prev.token_type](self, expressions) 5394 5395 self._retreat(index) 5396 5397 this: t.Optional[exp.Expression] 5398 5399 if self._match(TokenType.DISTINCT): 5400 this = self.expression( 5401 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5402 ) 5403 else: 5404 this = self._parse_select_or_expression(alias=alias) 5405 5406 return self._parse_limit( 5407 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5408 ) 5409 5410 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5411 index = self._index 5412 if not self._match(TokenType.L_PAREN): 5413 return this 5414 5415 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5416 # expr can be of both types 5417 if self._match_set(self.SELECT_START_TOKENS): 5418 self._retreat(index) 5419 return this 5420 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5421 self._match_r_paren() 5422 return self.expression(exp.Schema, this=this, expressions=args) 5423 5424 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5425 return self._parse_column_def(self._parse_field(any_token=True)) 5426 5427 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5428 # column defs are not really columns, they're identifiers 5429 if isinstance(this, exp.Column): 5430 this = this.this 5431 5432 kind = self._parse_types(schema=True) 5433 5434 if self._match_text_seq("FOR", "ORDINALITY"): 5435 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5436 5437 constraints: t.List[exp.Expression] = [] 5438 5439 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5440 ("ALIAS", "MATERIALIZED") 5441 ): 5442 persisted = self._prev.text.upper() == "MATERIALIZED" 5443 constraint_kind = exp.ComputedColumnConstraint( 5444 this=self._parse_assignment(), 5445 persisted=persisted or self._match_text_seq("PERSISTED"), 5446 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5447 ) 5448 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5449 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5450 self._match(TokenType.ALIAS) 5451 constraints.append( 5452 self.expression( 5453 exp.ColumnConstraint, 5454 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5455 ) 5456 ) 5457 5458 while True: 5459 constraint = self._parse_column_constraint() 5460 if not constraint: 5461 break 5462 constraints.append(constraint) 5463 5464 if not kind and not constraints: 5465 return this 5466 5467 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5468 5469 def _parse_auto_increment( 5470 self, 5471 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5472 start = None 5473 increment = None 5474 5475 if self._match(TokenType.L_PAREN, advance=False): 5476 args = self._parse_wrapped_csv(self._parse_bitwise) 5477 start = seq_get(args, 0) 5478 increment = seq_get(args, 1) 5479 elif self._match_text_seq("START"): 5480 start = self._parse_bitwise() 5481 self._match_text_seq("INCREMENT") 5482 increment = self._parse_bitwise() 5483 5484 if start and increment: 5485 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5486 5487 return exp.AutoIncrementColumnConstraint() 5488 5489 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5490 if not self._match_text_seq("REFRESH"): 5491 self._retreat(self._index - 1) 5492 return None 5493 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5494 5495 def _parse_compress(self) -> exp.CompressColumnConstraint: 5496 if self._match(TokenType.L_PAREN, advance=False): 5497 return self.expression( 5498 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5499 ) 5500 5501 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5502 5503 def _parse_generated_as_identity( 5504 self, 5505 ) -> ( 5506 exp.GeneratedAsIdentityColumnConstraint 5507 | exp.ComputedColumnConstraint 5508 | exp.GeneratedAsRowColumnConstraint 5509 ): 5510 if self._match_text_seq("BY", "DEFAULT"): 5511 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5512 this = self.expression( 5513 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5514 ) 5515 else: 5516 self._match_text_seq("ALWAYS") 5517 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5518 5519 self._match(TokenType.ALIAS) 5520 5521 if self._match_text_seq("ROW"): 5522 start = self._match_text_seq("START") 5523 if not start: 5524 self._match(TokenType.END) 5525 hidden = self._match_text_seq("HIDDEN") 5526 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5527 5528 identity = self._match_text_seq("IDENTITY") 5529 5530 if self._match(TokenType.L_PAREN): 5531 if self._match(TokenType.START_WITH): 5532 this.set("start", self._parse_bitwise()) 5533 if self._match_text_seq("INCREMENT", "BY"): 5534 this.set("increment", self._parse_bitwise()) 5535 if self._match_text_seq("MINVALUE"): 5536 this.set("minvalue", self._parse_bitwise()) 5537 if self._match_text_seq("MAXVALUE"): 5538 this.set("maxvalue", self._parse_bitwise()) 5539 5540 if self._match_text_seq("CYCLE"): 5541 this.set("cycle", True) 5542 elif self._match_text_seq("NO", "CYCLE"): 5543 this.set("cycle", False) 5544 5545 if not identity: 5546 this.set("expression", self._parse_range()) 5547 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5548 args = self._parse_csv(self._parse_bitwise) 5549 this.set("start", seq_get(args, 0)) 5550 this.set("increment", seq_get(args, 1)) 5551 5552 self._match_r_paren() 5553 5554 return this 5555 5556 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5557 self._match_text_seq("LENGTH") 5558 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5559 5560 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5561 if self._match_text_seq("NULL"): 5562 return self.expression(exp.NotNullColumnConstraint) 5563 if self._match_text_seq("CASESPECIFIC"): 5564 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5565 if self._match_text_seq("FOR", "REPLICATION"): 5566 return self.expression(exp.NotForReplicationColumnConstraint) 5567 5568 # Unconsume the `NOT` token 5569 self._retreat(self._index - 1) 5570 return None 5571 5572 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5573 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5574 5575 procedure_option_follows = ( 5576 self._match(TokenType.WITH, advance=False) 5577 and self._next 5578 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5579 ) 5580 5581 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5582 return self.expression( 5583 exp.ColumnConstraint, 5584 this=this, 5585 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5586 ) 5587 5588 return this 5589 5590 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5591 if not self._match(TokenType.CONSTRAINT): 5592 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5593 5594 return self.expression( 5595 exp.Constraint, 5596 this=self._parse_id_var(), 5597 expressions=self._parse_unnamed_constraints(), 5598 ) 5599 5600 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5601 constraints = [] 5602 while True: 5603 constraint = self._parse_unnamed_constraint() or self._parse_function() 5604 if not constraint: 5605 break 5606 constraints.append(constraint) 5607 5608 return constraints 5609 5610 def _parse_unnamed_constraint( 5611 self, constraints: t.Optional[t.Collection[str]] = None 5612 ) -> t.Optional[exp.Expression]: 5613 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5614 constraints or self.CONSTRAINT_PARSERS 5615 ): 5616 return None 5617 5618 constraint = self._prev.text.upper() 5619 if constraint not in self.CONSTRAINT_PARSERS: 5620 self.raise_error(f"No parser found for schema constraint {constraint}.") 5621 5622 return self.CONSTRAINT_PARSERS[constraint](self) 5623 5624 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5625 return self._parse_id_var(any_token=False) 5626 5627 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5628 self._match_text_seq("KEY") 5629 return self.expression( 5630 exp.UniqueColumnConstraint, 5631 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5632 this=self._parse_schema(self._parse_unique_key()), 5633 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5634 on_conflict=self._parse_on_conflict(), 5635 ) 5636 5637 def _parse_key_constraint_options(self) -> t.List[str]: 5638 options = [] 5639 while True: 5640 if not self._curr: 5641 break 5642 5643 if self._match(TokenType.ON): 5644 action = None 5645 on = self._advance_any() and self._prev.text 5646 5647 if self._match_text_seq("NO", "ACTION"): 5648 action = "NO ACTION" 5649 elif self._match_text_seq("CASCADE"): 5650 action = "CASCADE" 5651 elif self._match_text_seq("RESTRICT"): 5652 action = "RESTRICT" 5653 elif self._match_pair(TokenType.SET, TokenType.NULL): 5654 action = "SET NULL" 5655 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5656 action = "SET DEFAULT" 5657 else: 5658 self.raise_error("Invalid key constraint") 5659 5660 options.append(f"ON {on} {action}") 5661 else: 5662 var = self._parse_var_from_options( 5663 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5664 ) 5665 if not var: 5666 break 5667 options.append(var.name) 5668 5669 return options 5670 5671 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5672 if match and not self._match(TokenType.REFERENCES): 5673 return None 5674 5675 expressions = None 5676 this = self._parse_table(schema=True) 5677 options = self._parse_key_constraint_options() 5678 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5679 5680 def _parse_foreign_key(self) -> exp.ForeignKey: 5681 expressions = self._parse_wrapped_id_vars() 5682 reference = self._parse_references() 5683 options = {} 5684 5685 while self._match(TokenType.ON): 5686 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5687 self.raise_error("Expected DELETE or UPDATE") 5688 5689 kind = self._prev.text.lower() 5690 5691 if self._match_text_seq("NO", "ACTION"): 5692 action = "NO ACTION" 5693 elif self._match(TokenType.SET): 5694 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5695 action = "SET " + self._prev.text.upper() 5696 else: 5697 self._advance() 5698 action = self._prev.text.upper() 5699 5700 options[kind] = action 5701 5702 return self.expression( 5703 exp.ForeignKey, 5704 expressions=expressions, 5705 reference=reference, 5706 **options, # type: ignore 5707 ) 5708 5709 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5710 return self._parse_field() 5711 5712 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5713 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5714 self._retreat(self._index - 1) 5715 return None 5716 5717 id_vars = self._parse_wrapped_id_vars() 5718 return self.expression( 5719 exp.PeriodForSystemTimeConstraint, 5720 this=seq_get(id_vars, 0), 5721 expression=seq_get(id_vars, 1), 5722 ) 5723 5724 def _parse_primary_key( 5725 self, wrapped_optional: bool = False, in_props: bool = False 5726 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5727 desc = ( 5728 self._match_set((TokenType.ASC, TokenType.DESC)) 5729 and self._prev.token_type == TokenType.DESC 5730 ) 5731 5732 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5733 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5734 5735 expressions = self._parse_wrapped_csv( 5736 self._parse_primary_key_part, optional=wrapped_optional 5737 ) 5738 options = self._parse_key_constraint_options() 5739 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5740 5741 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5742 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5743 5744 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5745 """ 5746 Parses a datetime column in ODBC format. We parse the column into the corresponding 5747 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5748 same as we did for `DATE('yyyy-mm-dd')`. 5749 5750 Reference: 5751 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5752 """ 5753 self._match(TokenType.VAR) 5754 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5755 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5756 if not self._match(TokenType.R_BRACE): 5757 self.raise_error("Expected }") 5758 return expression 5759 5760 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5761 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5762 return this 5763 5764 bracket_kind = self._prev.token_type 5765 if ( 5766 bracket_kind == TokenType.L_BRACE 5767 and self._curr 5768 and self._curr.token_type == TokenType.VAR 5769 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5770 ): 5771 return self._parse_odbc_datetime_literal() 5772 5773 expressions = self._parse_csv( 5774 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5775 ) 5776 5777 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5778 self.raise_error("Expected ]") 5779 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5780 self.raise_error("Expected }") 5781 5782 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5783 if bracket_kind == TokenType.L_BRACE: 5784 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5785 elif not this: 5786 this = build_array_constructor( 5787 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5788 ) 5789 else: 5790 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5791 if constructor_type: 5792 return build_array_constructor( 5793 constructor_type, 5794 args=expressions, 5795 bracket_kind=bracket_kind, 5796 dialect=self.dialect, 5797 ) 5798 5799 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5800 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5801 5802 self._add_comments(this) 5803 return self._parse_bracket(this) 5804 5805 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5806 if self._match(TokenType.COLON): 5807 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5808 return this 5809 5810 def _parse_case(self) -> t.Optional[exp.Expression]: 5811 ifs = [] 5812 default = None 5813 5814 comments = self._prev_comments 5815 expression = self._parse_assignment() 5816 5817 while self._match(TokenType.WHEN): 5818 this = self._parse_assignment() 5819 self._match(TokenType.THEN) 5820 then = self._parse_assignment() 5821 ifs.append(self.expression(exp.If, this=this, true=then)) 5822 5823 if self._match(TokenType.ELSE): 5824 default = self._parse_assignment() 5825 5826 if not self._match(TokenType.END): 5827 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5828 default = exp.column("interval") 5829 else: 5830 self.raise_error("Expected END after CASE", self._prev) 5831 5832 return self.expression( 5833 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5834 ) 5835 5836 def _parse_if(self) -> t.Optional[exp.Expression]: 5837 if self._match(TokenType.L_PAREN): 5838 args = self._parse_csv(self._parse_assignment) 5839 this = self.validate_expression(exp.If.from_arg_list(args), args) 5840 self._match_r_paren() 5841 else: 5842 index = self._index - 1 5843 5844 if self.NO_PAREN_IF_COMMANDS and index == 0: 5845 return self._parse_as_command(self._prev) 5846 5847 condition = self._parse_assignment() 5848 5849 if not condition: 5850 self._retreat(index) 5851 return None 5852 5853 self._match(TokenType.THEN) 5854 true = self._parse_assignment() 5855 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5856 self._match(TokenType.END) 5857 this = self.expression(exp.If, this=condition, true=true, false=false) 5858 5859 return this 5860 5861 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5862 if not self._match_text_seq("VALUE", "FOR"): 5863 self._retreat(self._index - 1) 5864 return None 5865 5866 return self.expression( 5867 exp.NextValueFor, 5868 this=self._parse_column(), 5869 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5870 ) 5871 5872 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5873 this = self._parse_function() or self._parse_var_or_string(upper=True) 5874 5875 if self._match(TokenType.FROM): 5876 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5877 5878 if not self._match(TokenType.COMMA): 5879 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5880 5881 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5882 5883 def _parse_gap_fill(self) -> exp.GapFill: 5884 self._match(TokenType.TABLE) 5885 this = self._parse_table() 5886 5887 self._match(TokenType.COMMA) 5888 args = [this, *self._parse_csv(self._parse_lambda)] 5889 5890 gap_fill = exp.GapFill.from_arg_list(args) 5891 return self.validate_expression(gap_fill, args) 5892 5893 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5894 this = self._parse_assignment() 5895 5896 if not self._match(TokenType.ALIAS): 5897 if self._match(TokenType.COMMA): 5898 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5899 5900 self.raise_error("Expected AS after CAST") 5901 5902 fmt = None 5903 to = self._parse_types() 5904 5905 if self._match(TokenType.FORMAT): 5906 fmt_string = self._parse_string() 5907 fmt = self._parse_at_time_zone(fmt_string) 5908 5909 if not to: 5910 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5911 if to.this in exp.DataType.TEMPORAL_TYPES: 5912 this = self.expression( 5913 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5914 this=this, 5915 format=exp.Literal.string( 5916 format_time( 5917 fmt_string.this if fmt_string else "", 5918 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5919 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5920 ) 5921 ), 5922 safe=safe, 5923 ) 5924 5925 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5926 this.set("zone", fmt.args["zone"]) 5927 return this 5928 elif not to: 5929 self.raise_error("Expected TYPE after CAST") 5930 elif isinstance(to, exp.Identifier): 5931 to = exp.DataType.build(to.name, udt=True) 5932 elif to.this == exp.DataType.Type.CHAR: 5933 if self._match(TokenType.CHARACTER_SET): 5934 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5935 5936 return self.expression( 5937 exp.Cast if strict else exp.TryCast, 5938 this=this, 5939 to=to, 5940 format=fmt, 5941 safe=safe, 5942 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5943 ) 5944 5945 def _parse_string_agg(self) -> exp.Expression: 5946 if self._match(TokenType.DISTINCT): 5947 args: t.List[t.Optional[exp.Expression]] = [ 5948 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5949 ] 5950 if self._match(TokenType.COMMA): 5951 args.extend(self._parse_csv(self._parse_assignment)) 5952 else: 5953 args = self._parse_csv(self._parse_assignment) # type: ignore 5954 5955 index = self._index 5956 if not self._match(TokenType.R_PAREN) and args: 5957 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5958 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5959 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5960 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5961 5962 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5963 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5964 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5965 if not self._match_text_seq("WITHIN", "GROUP"): 5966 self._retreat(index) 5967 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5968 5969 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5970 order = self._parse_order(this=seq_get(args, 0)) 5971 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5972 5973 def _parse_convert( 5974 self, strict: bool, safe: t.Optional[bool] = None 5975 ) -> t.Optional[exp.Expression]: 5976 this = self._parse_bitwise() 5977 5978 if self._match(TokenType.USING): 5979 to: t.Optional[exp.Expression] = self.expression( 5980 exp.CharacterSet, this=self._parse_var() 5981 ) 5982 elif self._match(TokenType.COMMA): 5983 to = self._parse_types() 5984 else: 5985 to = None 5986 5987 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5988 5989 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5990 """ 5991 There are generally two variants of the DECODE function: 5992 5993 - DECODE(bin, charset) 5994 - DECODE(expression, search, result [, search, result] ... [, default]) 5995 5996 The second variant will always be parsed into a CASE expression. Note that NULL 5997 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5998 instead of relying on pattern matching. 5999 """ 6000 args = self._parse_csv(self._parse_assignment) 6001 6002 if len(args) < 3: 6003 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6004 6005 expression, *expressions = args 6006 if not expression: 6007 return None 6008 6009 ifs = [] 6010 for search, result in zip(expressions[::2], expressions[1::2]): 6011 if not search or not result: 6012 return None 6013 6014 if isinstance(search, exp.Literal): 6015 ifs.append( 6016 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6017 ) 6018 elif isinstance(search, exp.Null): 6019 ifs.append( 6020 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6021 ) 6022 else: 6023 cond = exp.or_( 6024 exp.EQ(this=expression.copy(), expression=search), 6025 exp.and_( 6026 exp.Is(this=expression.copy(), expression=exp.Null()), 6027 exp.Is(this=search.copy(), expression=exp.Null()), 6028 copy=False, 6029 ), 6030 copy=False, 6031 ) 6032 ifs.append(exp.If(this=cond, true=result)) 6033 6034 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6035 6036 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6037 self._match_text_seq("KEY") 6038 key = self._parse_column() 6039 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6040 self._match_text_seq("VALUE") 6041 value = self._parse_bitwise() 6042 6043 if not key and not value: 6044 return None 6045 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6046 6047 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6048 if not this or not self._match_text_seq("FORMAT", "JSON"): 6049 return this 6050 6051 return self.expression(exp.FormatJson, this=this) 6052 6053 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6054 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6055 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6056 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6057 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6058 else: 6059 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6060 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6061 6062 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6063 6064 if not empty and not error and not null: 6065 return None 6066 6067 return self.expression( 6068 exp.OnCondition, 6069 empty=empty, 6070 error=error, 6071 null=null, 6072 ) 6073 6074 def _parse_on_handling( 6075 self, on: str, *values: str 6076 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6077 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6078 for value in values: 6079 if self._match_text_seq(value, "ON", on): 6080 return f"{value} ON {on}" 6081 6082 index = self._index 6083 if self._match(TokenType.DEFAULT): 6084 default_value = self._parse_bitwise() 6085 if self._match_text_seq("ON", on): 6086 return default_value 6087 6088 self._retreat(index) 6089 6090 return None 6091 6092 @t.overload 6093 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6094 6095 @t.overload 6096 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6097 6098 def _parse_json_object(self, agg=False): 6099 star = self._parse_star() 6100 expressions = ( 6101 [star] 6102 if star 6103 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6104 ) 6105 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6106 6107 unique_keys = None 6108 if self._match_text_seq("WITH", "UNIQUE"): 6109 unique_keys = True 6110 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6111 unique_keys = False 6112 6113 self._match_text_seq("KEYS") 6114 6115 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6116 self._parse_type() 6117 ) 6118 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6119 6120 return self.expression( 6121 exp.JSONObjectAgg if agg else exp.JSONObject, 6122 expressions=expressions, 6123 null_handling=null_handling, 6124 unique_keys=unique_keys, 6125 return_type=return_type, 6126 encoding=encoding, 6127 ) 6128 6129 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6130 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6131 if not self._match_text_seq("NESTED"): 6132 this = self._parse_id_var() 6133 kind = self._parse_types(allow_identifiers=False) 6134 nested = None 6135 else: 6136 this = None 6137 kind = None 6138 nested = True 6139 6140 path = self._match_text_seq("PATH") and self._parse_string() 6141 nested_schema = nested and self._parse_json_schema() 6142 6143 return self.expression( 6144 exp.JSONColumnDef, 6145 this=this, 6146 kind=kind, 6147 path=path, 6148 nested_schema=nested_schema, 6149 ) 6150 6151 def _parse_json_schema(self) -> exp.JSONSchema: 6152 self._match_text_seq("COLUMNS") 6153 return self.expression( 6154 exp.JSONSchema, 6155 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6156 ) 6157 6158 def _parse_json_table(self) -> exp.JSONTable: 6159 this = self._parse_format_json(self._parse_bitwise()) 6160 path = self._match(TokenType.COMMA) and self._parse_string() 6161 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6162 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6163 schema = self._parse_json_schema() 6164 6165 return exp.JSONTable( 6166 this=this, 6167 schema=schema, 6168 path=path, 6169 error_handling=error_handling, 6170 empty_handling=empty_handling, 6171 ) 6172 6173 def _parse_match_against(self) -> exp.MatchAgainst: 6174 expressions = self._parse_csv(self._parse_column) 6175 6176 self._match_text_seq(")", "AGAINST", "(") 6177 6178 this = self._parse_string() 6179 6180 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6181 modifier = "IN NATURAL LANGUAGE MODE" 6182 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6183 modifier = f"{modifier} WITH QUERY EXPANSION" 6184 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6185 modifier = "IN BOOLEAN MODE" 6186 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6187 modifier = "WITH QUERY EXPANSION" 6188 else: 6189 modifier = None 6190 6191 return self.expression( 6192 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6193 ) 6194 6195 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6196 def _parse_open_json(self) -> exp.OpenJSON: 6197 this = self._parse_bitwise() 6198 path = self._match(TokenType.COMMA) and self._parse_string() 6199 6200 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6201 this = self._parse_field(any_token=True) 6202 kind = self._parse_types() 6203 path = self._parse_string() 6204 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6205 6206 return self.expression( 6207 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6208 ) 6209 6210 expressions = None 6211 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6212 self._match_l_paren() 6213 expressions = self._parse_csv(_parse_open_json_column_def) 6214 6215 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6216 6217 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6218 args = self._parse_csv(self._parse_bitwise) 6219 6220 if self._match(TokenType.IN): 6221 return self.expression( 6222 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6223 ) 6224 6225 if haystack_first: 6226 haystack = seq_get(args, 0) 6227 needle = seq_get(args, 1) 6228 else: 6229 needle = seq_get(args, 0) 6230 haystack = seq_get(args, 1) 6231 6232 return self.expression( 6233 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6234 ) 6235 6236 def _parse_predict(self) -> exp.Predict: 6237 self._match_text_seq("MODEL") 6238 this = self._parse_table() 6239 6240 self._match(TokenType.COMMA) 6241 self._match_text_seq("TABLE") 6242 6243 return self.expression( 6244 exp.Predict, 6245 this=this, 6246 expression=self._parse_table(), 6247 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6248 ) 6249 6250 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6251 args = self._parse_csv(self._parse_table) 6252 return exp.JoinHint(this=func_name.upper(), expressions=args) 6253 6254 def _parse_substring(self) -> exp.Substring: 6255 # Postgres supports the form: substring(string [from int] [for int]) 6256 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6257 6258 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6259 6260 if self._match(TokenType.FROM): 6261 args.append(self._parse_bitwise()) 6262 if self._match(TokenType.FOR): 6263 if len(args) == 1: 6264 args.append(exp.Literal.number(1)) 6265 args.append(self._parse_bitwise()) 6266 6267 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6268 6269 def _parse_trim(self) -> exp.Trim: 6270 # https://www.w3resource.com/sql/character-functions/trim.php 6271 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6272 6273 position = None 6274 collation = None 6275 expression = None 6276 6277 if self._match_texts(self.TRIM_TYPES): 6278 position = self._prev.text.upper() 6279 6280 this = self._parse_bitwise() 6281 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6282 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6283 expression = self._parse_bitwise() 6284 6285 if invert_order: 6286 this, expression = expression, this 6287 6288 if self._match(TokenType.COLLATE): 6289 collation = self._parse_bitwise() 6290 6291 return self.expression( 6292 exp.Trim, this=this, position=position, expression=expression, collation=collation 6293 ) 6294 6295 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6296 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6297 6298 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6299 return self._parse_window(self._parse_id_var(), alias=True) 6300 6301 def _parse_respect_or_ignore_nulls( 6302 self, this: t.Optional[exp.Expression] 6303 ) -> t.Optional[exp.Expression]: 6304 if self._match_text_seq("IGNORE", "NULLS"): 6305 return self.expression(exp.IgnoreNulls, this=this) 6306 if self._match_text_seq("RESPECT", "NULLS"): 6307 return self.expression(exp.RespectNulls, this=this) 6308 return this 6309 6310 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6311 if self._match(TokenType.HAVING): 6312 self._match_texts(("MAX", "MIN")) 6313 max = self._prev.text.upper() != "MIN" 6314 return self.expression( 6315 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6316 ) 6317 6318 return this 6319 6320 def _parse_window( 6321 self, this: t.Optional[exp.Expression], alias: bool = False 6322 ) -> t.Optional[exp.Expression]: 6323 func = this 6324 comments = func.comments if isinstance(func, exp.Expression) else None 6325 6326 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6327 self._match(TokenType.WHERE) 6328 this = self.expression( 6329 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6330 ) 6331 self._match_r_paren() 6332 6333 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6334 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6335 if self._match_text_seq("WITHIN", "GROUP"): 6336 order = self._parse_wrapped(self._parse_order) 6337 this = self.expression(exp.WithinGroup, this=this, expression=order) 6338 6339 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6340 # Some dialects choose to implement and some do not. 6341 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6342 6343 # There is some code above in _parse_lambda that handles 6344 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6345 6346 # The below changes handle 6347 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6348 6349 # Oracle allows both formats 6350 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6351 # and Snowflake chose to do the same for familiarity 6352 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6353 if isinstance(this, exp.AggFunc): 6354 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6355 6356 if ignore_respect and ignore_respect is not this: 6357 ignore_respect.replace(ignore_respect.this) 6358 this = self.expression(ignore_respect.__class__, this=this) 6359 6360 this = self._parse_respect_or_ignore_nulls(this) 6361 6362 # bigquery select from window x AS (partition by ...) 6363 if alias: 6364 over = None 6365 self._match(TokenType.ALIAS) 6366 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6367 return this 6368 else: 6369 over = self._prev.text.upper() 6370 6371 if comments and isinstance(func, exp.Expression): 6372 func.pop_comments() 6373 6374 if not self._match(TokenType.L_PAREN): 6375 return self.expression( 6376 exp.Window, 6377 comments=comments, 6378 this=this, 6379 alias=self._parse_id_var(False), 6380 over=over, 6381 ) 6382 6383 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6384 6385 first = self._match(TokenType.FIRST) 6386 if self._match_text_seq("LAST"): 6387 first = False 6388 6389 partition, order = self._parse_partition_and_order() 6390 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6391 6392 if kind: 6393 self._match(TokenType.BETWEEN) 6394 start = self._parse_window_spec() 6395 self._match(TokenType.AND) 6396 end = self._parse_window_spec() 6397 6398 spec = self.expression( 6399 exp.WindowSpec, 6400 kind=kind, 6401 start=start["value"], 6402 start_side=start["side"], 6403 end=end["value"], 6404 end_side=end["side"], 6405 ) 6406 else: 6407 spec = None 6408 6409 self._match_r_paren() 6410 6411 window = self.expression( 6412 exp.Window, 6413 comments=comments, 6414 this=this, 6415 partition_by=partition, 6416 order=order, 6417 spec=spec, 6418 alias=window_alias, 6419 over=over, 6420 first=first, 6421 ) 6422 6423 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6424 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6425 return self._parse_window(window, alias=alias) 6426 6427 return window 6428 6429 def _parse_partition_and_order( 6430 self, 6431 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6432 return self._parse_partition_by(), self._parse_order() 6433 6434 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6435 self._match(TokenType.BETWEEN) 6436 6437 return { 6438 "value": ( 6439 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6440 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6441 or self._parse_bitwise() 6442 ), 6443 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6444 } 6445 6446 def _parse_alias( 6447 self, this: t.Optional[exp.Expression], explicit: bool = False 6448 ) -> t.Optional[exp.Expression]: 6449 any_token = self._match(TokenType.ALIAS) 6450 comments = self._prev_comments or [] 6451 6452 if explicit and not any_token: 6453 return this 6454 6455 if self._match(TokenType.L_PAREN): 6456 aliases = self.expression( 6457 exp.Aliases, 6458 comments=comments, 6459 this=this, 6460 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6461 ) 6462 self._match_r_paren(aliases) 6463 return aliases 6464 6465 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6466 self.STRING_ALIASES and self._parse_string_as_identifier() 6467 ) 6468 6469 if alias: 6470 comments.extend(alias.pop_comments()) 6471 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6472 column = this.this 6473 6474 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6475 if not this.comments and column and column.comments: 6476 this.comments = column.pop_comments() 6477 6478 return this 6479 6480 def _parse_id_var( 6481 self, 6482 any_token: bool = True, 6483 tokens: t.Optional[t.Collection[TokenType]] = None, 6484 ) -> t.Optional[exp.Expression]: 6485 expression = self._parse_identifier() 6486 if not expression and ( 6487 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6488 ): 6489 quoted = self._prev.token_type == TokenType.STRING 6490 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6491 6492 return expression 6493 6494 def _parse_string(self) -> t.Optional[exp.Expression]: 6495 if self._match_set(self.STRING_PARSERS): 6496 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6497 return self._parse_placeholder() 6498 6499 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6500 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6501 6502 def _parse_number(self) -> t.Optional[exp.Expression]: 6503 if self._match_set(self.NUMERIC_PARSERS): 6504 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6505 return self._parse_placeholder() 6506 6507 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6508 if self._match(TokenType.IDENTIFIER): 6509 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6510 return self._parse_placeholder() 6511 6512 def _parse_var( 6513 self, 6514 any_token: bool = False, 6515 tokens: t.Optional[t.Collection[TokenType]] = None, 6516 upper: bool = False, 6517 ) -> t.Optional[exp.Expression]: 6518 if ( 6519 (any_token and self._advance_any()) 6520 or self._match(TokenType.VAR) 6521 or (self._match_set(tokens) if tokens else False) 6522 ): 6523 return self.expression( 6524 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6525 ) 6526 return self._parse_placeholder() 6527 6528 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6529 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6530 self._advance() 6531 return self._prev 6532 return None 6533 6534 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6535 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6536 6537 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6538 return self._parse_primary() or self._parse_var(any_token=True) 6539 6540 def _parse_null(self) -> t.Optional[exp.Expression]: 6541 if self._match_set(self.NULL_TOKENS): 6542 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6543 return self._parse_placeholder() 6544 6545 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6546 if self._match(TokenType.TRUE): 6547 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6548 if self._match(TokenType.FALSE): 6549 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6550 return self._parse_placeholder() 6551 6552 def _parse_star(self) -> t.Optional[exp.Expression]: 6553 if self._match(TokenType.STAR): 6554 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6555 return self._parse_placeholder() 6556 6557 def _parse_parameter(self) -> exp.Parameter: 6558 this = self._parse_identifier() or self._parse_primary_or_var() 6559 return self.expression(exp.Parameter, this=this) 6560 6561 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6562 if self._match_set(self.PLACEHOLDER_PARSERS): 6563 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6564 if placeholder: 6565 return placeholder 6566 self._advance(-1) 6567 return None 6568 6569 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6570 if not self._match_texts(keywords): 6571 return None 6572 if self._match(TokenType.L_PAREN, advance=False): 6573 return self._parse_wrapped_csv(self._parse_expression) 6574 6575 expression = self._parse_expression() 6576 return [expression] if expression else None 6577 6578 def _parse_csv( 6579 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6580 ) -> t.List[exp.Expression]: 6581 parse_result = parse_method() 6582 items = [parse_result] if parse_result is not None else [] 6583 6584 while self._match(sep): 6585 self._add_comments(parse_result) 6586 parse_result = parse_method() 6587 if parse_result is not None: 6588 items.append(parse_result) 6589 6590 return items 6591 6592 def _parse_tokens( 6593 self, parse_method: t.Callable, expressions: t.Dict 6594 ) -> t.Optional[exp.Expression]: 6595 this = parse_method() 6596 6597 while self._match_set(expressions): 6598 this = self.expression( 6599 expressions[self._prev.token_type], 6600 this=this, 6601 comments=self._prev_comments, 6602 expression=parse_method(), 6603 ) 6604 6605 return this 6606 6607 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6608 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6609 6610 def _parse_wrapped_csv( 6611 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6612 ) -> t.List[exp.Expression]: 6613 return self._parse_wrapped( 6614 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6615 ) 6616 6617 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6618 wrapped = self._match(TokenType.L_PAREN) 6619 if not wrapped and not optional: 6620 self.raise_error("Expecting (") 6621 parse_result = parse_method() 6622 if wrapped: 6623 self._match_r_paren() 6624 return parse_result 6625 6626 def _parse_expressions(self) -> t.List[exp.Expression]: 6627 return self._parse_csv(self._parse_expression) 6628 6629 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6630 return self._parse_select() or self._parse_set_operations( 6631 self._parse_expression() if alias else self._parse_assignment() 6632 ) 6633 6634 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6635 return self._parse_query_modifiers( 6636 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6637 ) 6638 6639 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6640 this = None 6641 if self._match_texts(self.TRANSACTION_KIND): 6642 this = self._prev.text 6643 6644 self._match_texts(("TRANSACTION", "WORK")) 6645 6646 modes = [] 6647 while True: 6648 mode = [] 6649 while self._match(TokenType.VAR): 6650 mode.append(self._prev.text) 6651 6652 if mode: 6653 modes.append(" ".join(mode)) 6654 if not self._match(TokenType.COMMA): 6655 break 6656 6657 return self.expression(exp.Transaction, this=this, modes=modes) 6658 6659 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6660 chain = None 6661 savepoint = None 6662 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6663 6664 self._match_texts(("TRANSACTION", "WORK")) 6665 6666 if self._match_text_seq("TO"): 6667 self._match_text_seq("SAVEPOINT") 6668 savepoint = self._parse_id_var() 6669 6670 if self._match(TokenType.AND): 6671 chain = not self._match_text_seq("NO") 6672 self._match_text_seq("CHAIN") 6673 6674 if is_rollback: 6675 return self.expression(exp.Rollback, savepoint=savepoint) 6676 6677 return self.expression(exp.Commit, chain=chain) 6678 6679 def _parse_refresh(self) -> exp.Refresh: 6680 self._match(TokenType.TABLE) 6681 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6682 6683 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6684 if not self._match_text_seq("ADD"): 6685 return None 6686 6687 self._match(TokenType.COLUMN) 6688 exists_column = self._parse_exists(not_=True) 6689 expression = self._parse_field_def() 6690 6691 if expression: 6692 expression.set("exists", exists_column) 6693 6694 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6695 if self._match_texts(("FIRST", "AFTER")): 6696 position = self._prev.text 6697 column_position = self.expression( 6698 exp.ColumnPosition, this=self._parse_column(), position=position 6699 ) 6700 expression.set("position", column_position) 6701 6702 return expression 6703 6704 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6705 drop = self._match(TokenType.DROP) and self._parse_drop() 6706 if drop and not isinstance(drop, exp.Command): 6707 drop.set("kind", drop.args.get("kind", "COLUMN")) 6708 return drop 6709 6710 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6711 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6712 return self.expression( 6713 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6714 ) 6715 6716 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6717 index = self._index - 1 6718 6719 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6720 return self._parse_csv( 6721 lambda: self.expression( 6722 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6723 ) 6724 ) 6725 6726 self._retreat(index) 6727 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6728 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6729 6730 if self._match_text_seq("ADD", "COLUMNS"): 6731 schema = self._parse_schema() 6732 if schema: 6733 return [schema] 6734 return [] 6735 6736 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6737 6738 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6739 if self._match_texts(self.ALTER_ALTER_PARSERS): 6740 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6741 6742 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6743 # keyword after ALTER we default to parsing this statement 6744 self._match(TokenType.COLUMN) 6745 column = self._parse_field(any_token=True) 6746 6747 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6748 return self.expression(exp.AlterColumn, this=column, drop=True) 6749 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6750 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6751 if self._match(TokenType.COMMENT): 6752 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6753 if self._match_text_seq("DROP", "NOT", "NULL"): 6754 return self.expression( 6755 exp.AlterColumn, 6756 this=column, 6757 drop=True, 6758 allow_null=True, 6759 ) 6760 if self._match_text_seq("SET", "NOT", "NULL"): 6761 return self.expression( 6762 exp.AlterColumn, 6763 this=column, 6764 allow_null=False, 6765 ) 6766 self._match_text_seq("SET", "DATA") 6767 self._match_text_seq("TYPE") 6768 return self.expression( 6769 exp.AlterColumn, 6770 this=column, 6771 dtype=self._parse_types(), 6772 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6773 using=self._match(TokenType.USING) and self._parse_assignment(), 6774 ) 6775 6776 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6777 if self._match_texts(("ALL", "EVEN", "AUTO")): 6778 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6779 6780 self._match_text_seq("KEY", "DISTKEY") 6781 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6782 6783 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6784 if compound: 6785 self._match_text_seq("SORTKEY") 6786 6787 if self._match(TokenType.L_PAREN, advance=False): 6788 return self.expression( 6789 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6790 ) 6791 6792 self._match_texts(("AUTO", "NONE")) 6793 return self.expression( 6794 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6795 ) 6796 6797 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6798 index = self._index - 1 6799 6800 partition_exists = self._parse_exists() 6801 if self._match(TokenType.PARTITION, advance=False): 6802 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6803 6804 self._retreat(index) 6805 return self._parse_csv(self._parse_drop_column) 6806 6807 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6808 if self._match(TokenType.COLUMN): 6809 exists = self._parse_exists() 6810 old_column = self._parse_column() 6811 to = self._match_text_seq("TO") 6812 new_column = self._parse_column() 6813 6814 if old_column is None or to is None or new_column is None: 6815 return None 6816 6817 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6818 6819 self._match_text_seq("TO") 6820 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6821 6822 def _parse_alter_table_set(self) -> exp.AlterSet: 6823 alter_set = self.expression(exp.AlterSet) 6824 6825 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6826 "TABLE", "PROPERTIES" 6827 ): 6828 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6829 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6830 alter_set.set("expressions", [self._parse_assignment()]) 6831 elif self._match_texts(("LOGGED", "UNLOGGED")): 6832 alter_set.set("option", exp.var(self._prev.text.upper())) 6833 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6834 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6835 elif self._match_text_seq("LOCATION"): 6836 alter_set.set("location", self._parse_field()) 6837 elif self._match_text_seq("ACCESS", "METHOD"): 6838 alter_set.set("access_method", self._parse_field()) 6839 elif self._match_text_seq("TABLESPACE"): 6840 alter_set.set("tablespace", self._parse_field()) 6841 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6842 alter_set.set("file_format", [self._parse_field()]) 6843 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6844 alter_set.set("file_format", self._parse_wrapped_options()) 6845 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6846 alter_set.set("copy_options", self._parse_wrapped_options()) 6847 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6848 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6849 else: 6850 if self._match_text_seq("SERDE"): 6851 alter_set.set("serde", self._parse_field()) 6852 6853 alter_set.set("expressions", [self._parse_properties()]) 6854 6855 return alter_set 6856 6857 def _parse_alter(self) -> exp.Alter | exp.Command: 6858 start = self._prev 6859 6860 alter_token = self._match_set(self.ALTERABLES) and self._prev 6861 if not alter_token: 6862 return self._parse_as_command(start) 6863 6864 exists = self._parse_exists() 6865 only = self._match_text_seq("ONLY") 6866 this = self._parse_table(schema=True) 6867 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6868 6869 if self._next: 6870 self._advance() 6871 6872 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6873 if parser: 6874 actions = ensure_list(parser(self)) 6875 not_valid = self._match_text_seq("NOT", "VALID") 6876 options = self._parse_csv(self._parse_property) 6877 6878 if not self._curr and actions: 6879 return self.expression( 6880 exp.Alter, 6881 this=this, 6882 kind=alter_token.text.upper(), 6883 exists=exists, 6884 actions=actions, 6885 only=only, 6886 options=options, 6887 cluster=cluster, 6888 not_valid=not_valid, 6889 ) 6890 6891 return self._parse_as_command(start) 6892 6893 def _parse_merge(self) -> exp.Merge: 6894 self._match(TokenType.INTO) 6895 target = self._parse_table() 6896 6897 if target and self._match(TokenType.ALIAS, advance=False): 6898 target.set("alias", self._parse_table_alias()) 6899 6900 self._match(TokenType.USING) 6901 using = self._parse_table() 6902 6903 self._match(TokenType.ON) 6904 on = self._parse_assignment() 6905 6906 return self.expression( 6907 exp.Merge, 6908 this=target, 6909 using=using, 6910 on=on, 6911 expressions=self._parse_when_matched(), 6912 returning=self._parse_returning(), 6913 ) 6914 6915 def _parse_when_matched(self) -> t.List[exp.When]: 6916 whens = [] 6917 6918 while self._match(TokenType.WHEN): 6919 matched = not self._match(TokenType.NOT) 6920 self._match_text_seq("MATCHED") 6921 source = ( 6922 False 6923 if self._match_text_seq("BY", "TARGET") 6924 else self._match_text_seq("BY", "SOURCE") 6925 ) 6926 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6927 6928 self._match(TokenType.THEN) 6929 6930 if self._match(TokenType.INSERT): 6931 this = self._parse_star() 6932 if this: 6933 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6934 else: 6935 then = self.expression( 6936 exp.Insert, 6937 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6938 expression=self._match_text_seq("VALUES") and self._parse_value(), 6939 ) 6940 elif self._match(TokenType.UPDATE): 6941 expressions = self._parse_star() 6942 if expressions: 6943 then = self.expression(exp.Update, expressions=expressions) 6944 else: 6945 then = self.expression( 6946 exp.Update, 6947 expressions=self._match(TokenType.SET) 6948 and self._parse_csv(self._parse_equality), 6949 ) 6950 elif self._match(TokenType.DELETE): 6951 then = self.expression(exp.Var, this=self._prev.text) 6952 else: 6953 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6954 6955 whens.append( 6956 self.expression( 6957 exp.When, 6958 matched=matched, 6959 source=source, 6960 condition=condition, 6961 then=then, 6962 ) 6963 ) 6964 return whens 6965 6966 def _parse_show(self) -> t.Optional[exp.Expression]: 6967 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6968 if parser: 6969 return parser(self) 6970 return self._parse_as_command(self._prev) 6971 6972 def _parse_set_item_assignment( 6973 self, kind: t.Optional[str] = None 6974 ) -> t.Optional[exp.Expression]: 6975 index = self._index 6976 6977 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6978 return self._parse_set_transaction(global_=kind == "GLOBAL") 6979 6980 left = self._parse_primary() or self._parse_column() 6981 assignment_delimiter = self._match_texts(("=", "TO")) 6982 6983 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6984 self._retreat(index) 6985 return None 6986 6987 right = self._parse_statement() or self._parse_id_var() 6988 if isinstance(right, (exp.Column, exp.Identifier)): 6989 right = exp.var(right.name) 6990 6991 this = self.expression(exp.EQ, this=left, expression=right) 6992 return self.expression(exp.SetItem, this=this, kind=kind) 6993 6994 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6995 self._match_text_seq("TRANSACTION") 6996 characteristics = self._parse_csv( 6997 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6998 ) 6999 return self.expression( 7000 exp.SetItem, 7001 expressions=characteristics, 7002 kind="TRANSACTION", 7003 **{"global": global_}, # type: ignore 7004 ) 7005 7006 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7007 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7008 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7009 7010 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7011 index = self._index 7012 set_ = self.expression( 7013 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7014 ) 7015 7016 if self._curr: 7017 self._retreat(index) 7018 return self._parse_as_command(self._prev) 7019 7020 return set_ 7021 7022 def _parse_var_from_options( 7023 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7024 ) -> t.Optional[exp.Var]: 7025 start = self._curr 7026 if not start: 7027 return None 7028 7029 option = start.text.upper() 7030 continuations = options.get(option) 7031 7032 index = self._index 7033 self._advance() 7034 for keywords in continuations or []: 7035 if isinstance(keywords, str): 7036 keywords = (keywords,) 7037 7038 if self._match_text_seq(*keywords): 7039 option = f"{option} {' '.join(keywords)}" 7040 break 7041 else: 7042 if continuations or continuations is None: 7043 if raise_unmatched: 7044 self.raise_error(f"Unknown option {option}") 7045 7046 self._retreat(index) 7047 return None 7048 7049 return exp.var(option) 7050 7051 def _parse_as_command(self, start: Token) -> exp.Command: 7052 while self._curr: 7053 self._advance() 7054 text = self._find_sql(start, self._prev) 7055 size = len(start.text) 7056 self._warn_unsupported() 7057 return exp.Command(this=text[:size], expression=text[size:]) 7058 7059 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7060 settings = [] 7061 7062 self._match_l_paren() 7063 kind = self._parse_id_var() 7064 7065 if self._match(TokenType.L_PAREN): 7066 while True: 7067 key = self._parse_id_var() 7068 value = self._parse_primary() 7069 7070 if not key and value is None: 7071 break 7072 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7073 self._match(TokenType.R_PAREN) 7074 7075 self._match_r_paren() 7076 7077 return self.expression( 7078 exp.DictProperty, 7079 this=this, 7080 kind=kind.this if kind else None, 7081 settings=settings, 7082 ) 7083 7084 def _parse_dict_range(self, this: str) -> exp.DictRange: 7085 self._match_l_paren() 7086 has_min = self._match_text_seq("MIN") 7087 if has_min: 7088 min = self._parse_var() or self._parse_primary() 7089 self._match_text_seq("MAX") 7090 max = self._parse_var() or self._parse_primary() 7091 else: 7092 max = self._parse_var() or self._parse_primary() 7093 min = exp.Literal.number(0) 7094 self._match_r_paren() 7095 return self.expression(exp.DictRange, this=this, min=min, max=max) 7096 7097 def _parse_comprehension( 7098 self, this: t.Optional[exp.Expression] 7099 ) -> t.Optional[exp.Comprehension]: 7100 index = self._index 7101 expression = self._parse_column() 7102 if not self._match(TokenType.IN): 7103 self._retreat(index - 1) 7104 return None 7105 iterator = self._parse_column() 7106 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7107 return self.expression( 7108 exp.Comprehension, 7109 this=this, 7110 expression=expression, 7111 iterator=iterator, 7112 condition=condition, 7113 ) 7114 7115 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7116 if self._match(TokenType.HEREDOC_STRING): 7117 return self.expression(exp.Heredoc, this=self._prev.text) 7118 7119 if not self._match_text_seq("$"): 7120 return None 7121 7122 tags = ["$"] 7123 tag_text = None 7124 7125 if self._is_connected(): 7126 self._advance() 7127 tags.append(self._prev.text.upper()) 7128 else: 7129 self.raise_error("No closing $ found") 7130 7131 if tags[-1] != "$": 7132 if self._is_connected() and self._match_text_seq("$"): 7133 tag_text = tags[-1] 7134 tags.append("$") 7135 else: 7136 self.raise_error("No closing $ found") 7137 7138 heredoc_start = self._curr 7139 7140 while self._curr: 7141 if self._match_text_seq(*tags, advance=False): 7142 this = self._find_sql(heredoc_start, self._prev) 7143 self._advance(len(tags)) 7144 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7145 7146 self._advance() 7147 7148 self.raise_error(f"No closing {''.join(tags)} found") 7149 return None 7150 7151 def _find_parser( 7152 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7153 ) -> t.Optional[t.Callable]: 7154 if not self._curr: 7155 return None 7156 7157 index = self._index 7158 this = [] 7159 while True: 7160 # The current token might be multiple words 7161 curr = self._curr.text.upper() 7162 key = curr.split(" ") 7163 this.append(curr) 7164 7165 self._advance() 7166 result, trie = in_trie(trie, key) 7167 if result == TrieResult.FAILED: 7168 break 7169 7170 if result == TrieResult.EXISTS: 7171 subparser = parsers[" ".join(this)] 7172 return subparser 7173 7174 self._retreat(index) 7175 return None 7176 7177 def _match(self, token_type, advance=True, expression=None): 7178 if not self._curr: 7179 return None 7180 7181 if self._curr.token_type == token_type: 7182 if advance: 7183 self._advance() 7184 self._add_comments(expression) 7185 return True 7186 7187 return None 7188 7189 def _match_set(self, types, advance=True): 7190 if not self._curr: 7191 return None 7192 7193 if self._curr.token_type in types: 7194 if advance: 7195 self._advance() 7196 return True 7197 7198 return None 7199 7200 def _match_pair(self, token_type_a, token_type_b, advance=True): 7201 if not self._curr or not self._next: 7202 return None 7203 7204 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7205 if advance: 7206 self._advance(2) 7207 return True 7208 7209 return None 7210 7211 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7212 if not self._match(TokenType.L_PAREN, expression=expression): 7213 self.raise_error("Expecting (") 7214 7215 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7216 if not self._match(TokenType.R_PAREN, expression=expression): 7217 self.raise_error("Expecting )") 7218 7219 def _match_texts(self, texts, advance=True): 7220 if ( 7221 self._curr 7222 and self._curr.token_type != TokenType.STRING 7223 and self._curr.text.upper() in texts 7224 ): 7225 if advance: 7226 self._advance() 7227 return True 7228 return None 7229 7230 def _match_text_seq(self, *texts, advance=True): 7231 index = self._index 7232 for text in texts: 7233 if ( 7234 self._curr 7235 and self._curr.token_type != TokenType.STRING 7236 and self._curr.text.upper() == text 7237 ): 7238 self._advance() 7239 else: 7240 self._retreat(index) 7241 return None 7242 7243 if not advance: 7244 self._retreat(index) 7245 7246 return True 7247 7248 def _replace_lambda( 7249 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7250 ) -> t.Optional[exp.Expression]: 7251 if not node: 7252 return node 7253 7254 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7255 7256 for column in node.find_all(exp.Column): 7257 typ = lambda_types.get(column.parts[0].name) 7258 if typ is not None: 7259 dot_or_id = column.to_dot() if column.table else column.this 7260 7261 if typ: 7262 dot_or_id = self.expression( 7263 exp.Cast, 7264 this=dot_or_id, 7265 to=typ, 7266 ) 7267 7268 parent = column.parent 7269 7270 while isinstance(parent, exp.Dot): 7271 if not isinstance(parent.parent, exp.Dot): 7272 parent.replace(dot_or_id) 7273 break 7274 parent = parent.parent 7275 else: 7276 if column is node: 7277 node = dot_or_id 7278 else: 7279 column.replace(dot_or_id) 7280 return node 7281 7282 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7283 start = self._prev 7284 7285 # Not to be confused with TRUNCATE(number, decimals) function call 7286 if self._match(TokenType.L_PAREN): 7287 self._retreat(self._index - 2) 7288 return self._parse_function() 7289 7290 # Clickhouse supports TRUNCATE DATABASE as well 7291 is_database = self._match(TokenType.DATABASE) 7292 7293 self._match(TokenType.TABLE) 7294 7295 exists = self._parse_exists(not_=False) 7296 7297 expressions = self._parse_csv( 7298 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7299 ) 7300 7301 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7302 7303 if self._match_text_seq("RESTART", "IDENTITY"): 7304 identity = "RESTART" 7305 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7306 identity = "CONTINUE" 7307 else: 7308 identity = None 7309 7310 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7311 option = self._prev.text 7312 else: 7313 option = None 7314 7315 partition = self._parse_partition() 7316 7317 # Fallback case 7318 if self._curr: 7319 return self._parse_as_command(start) 7320 7321 return self.expression( 7322 exp.TruncateTable, 7323 expressions=expressions, 7324 is_database=is_database, 7325 exists=exists, 7326 cluster=cluster, 7327 identity=identity, 7328 option=option, 7329 partition=partition, 7330 ) 7331 7332 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7333 this = self._parse_ordered(self._parse_opclass) 7334 7335 if not self._match(TokenType.WITH): 7336 return this 7337 7338 op = self._parse_var(any_token=True) 7339 7340 return self.expression(exp.WithOperator, this=this, op=op) 7341 7342 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7343 self._match(TokenType.EQ) 7344 self._match(TokenType.L_PAREN) 7345 7346 opts: t.List[t.Optional[exp.Expression]] = [] 7347 while self._curr and not self._match(TokenType.R_PAREN): 7348 if self._match_text_seq("FORMAT_NAME", "="): 7349 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7350 # so we parse it separately to use _parse_field() 7351 prop = self.expression( 7352 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7353 ) 7354 opts.append(prop) 7355 else: 7356 opts.append(self._parse_property()) 7357 7358 self._match(TokenType.COMMA) 7359 7360 return opts 7361 7362 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7363 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7364 7365 options = [] 7366 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7367 option = self._parse_var(any_token=True) 7368 prev = self._prev.text.upper() 7369 7370 # Different dialects might separate options and values by white space, "=" and "AS" 7371 self._match(TokenType.EQ) 7372 self._match(TokenType.ALIAS) 7373 7374 param = self.expression(exp.CopyParameter, this=option) 7375 7376 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7377 TokenType.L_PAREN, advance=False 7378 ): 7379 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7380 param.set("expressions", self._parse_wrapped_options()) 7381 elif prev == "FILE_FORMAT": 7382 # T-SQL's external file format case 7383 param.set("expression", self._parse_field()) 7384 else: 7385 param.set("expression", self._parse_unquoted_field()) 7386 7387 options.append(param) 7388 self._match(sep) 7389 7390 return options 7391 7392 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7393 expr = self.expression(exp.Credentials) 7394 7395 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7396 expr.set("storage", self._parse_field()) 7397 if self._match_text_seq("CREDENTIALS"): 7398 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7399 creds = ( 7400 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7401 ) 7402 expr.set("credentials", creds) 7403 if self._match_text_seq("ENCRYPTION"): 7404 expr.set("encryption", self._parse_wrapped_options()) 7405 if self._match_text_seq("IAM_ROLE"): 7406 expr.set("iam_role", self._parse_field()) 7407 if self._match_text_seq("REGION"): 7408 expr.set("region", self._parse_field()) 7409 7410 return expr 7411 7412 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7413 return self._parse_field() 7414 7415 def _parse_copy(self) -> exp.Copy | exp.Command: 7416 start = self._prev 7417 7418 self._match(TokenType.INTO) 7419 7420 this = ( 7421 self._parse_select(nested=True, parse_subquery_alias=False) 7422 if self._match(TokenType.L_PAREN, advance=False) 7423 else self._parse_table(schema=True) 7424 ) 7425 7426 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7427 7428 files = self._parse_csv(self._parse_file_location) 7429 credentials = self._parse_credentials() 7430 7431 self._match_text_seq("WITH") 7432 7433 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7434 7435 # Fallback case 7436 if self._curr: 7437 return self._parse_as_command(start) 7438 7439 return self.expression( 7440 exp.Copy, 7441 this=this, 7442 kind=kind, 7443 credentials=credentials, 7444 files=files, 7445 params=params, 7446 ) 7447 7448 def _parse_normalize(self) -> exp.Normalize: 7449 return self.expression( 7450 exp.Normalize, 7451 this=self._parse_bitwise(), 7452 form=self._match(TokenType.COMMA) and self._parse_var(), 7453 ) 7454 7455 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7456 if self._match_text_seq("COLUMNS", "(", advance=False): 7457 this = self._parse_function() 7458 if isinstance(this, exp.Columns): 7459 this.set("unpack", True) 7460 return this 7461 7462 return self.expression( 7463 exp.Star, 7464 **{ # type: ignore 7465 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7466 "replace": self._parse_star_op("REPLACE"), 7467 "rename": self._parse_star_op("RENAME"), 7468 }, 7469 ) 7470 7471 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7472 privilege_parts = [] 7473 7474 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7475 # (end of privilege list) or L_PAREN (start of column list) are met 7476 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7477 privilege_parts.append(self._curr.text.upper()) 7478 self._advance() 7479 7480 this = exp.var(" ".join(privilege_parts)) 7481 expressions = ( 7482 self._parse_wrapped_csv(self._parse_column) 7483 if self._match(TokenType.L_PAREN, advance=False) 7484 else None 7485 ) 7486 7487 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7488 7489 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7490 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7491 principal = self._parse_id_var() 7492 7493 if not principal: 7494 return None 7495 7496 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7497 7498 def _parse_grant(self) -> exp.Grant | exp.Command: 7499 start = self._prev 7500 7501 privileges = self._parse_csv(self._parse_grant_privilege) 7502 7503 self._match(TokenType.ON) 7504 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7505 7506 # Attempt to parse the securable e.g. MySQL allows names 7507 # such as "foo.*", "*.*" which are not easily parseable yet 7508 securable = self._try_parse(self._parse_table_parts) 7509 7510 if not securable or not self._match_text_seq("TO"): 7511 return self._parse_as_command(start) 7512 7513 principals = self._parse_csv(self._parse_grant_principal) 7514 7515 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7516 7517 if self._curr: 7518 return self._parse_as_command(start) 7519 7520 return self.expression( 7521 exp.Grant, 7522 privileges=privileges, 7523 kind=kind, 7524 securable=securable, 7525 principals=principals, 7526 grant_option=grant_option, 7527 ) 7528 7529 def _parse_overlay(self) -> exp.Overlay: 7530 return self.expression( 7531 exp.Overlay, 7532 **{ # type: ignore 7533 "this": self._parse_bitwise(), 7534 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7535 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7536 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7537 }, 7538 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1372 def __init__( 1373 self, 1374 error_level: t.Optional[ErrorLevel] = None, 1375 error_message_context: int = 100, 1376 max_errors: int = 3, 1377 dialect: DialectType = None, 1378 ): 1379 from sqlglot.dialects import Dialect 1380 1381 self.error_level = error_level or ErrorLevel.IMMEDIATE 1382 self.error_message_context = error_message_context 1383 self.max_errors = max_errors 1384 self.dialect = Dialect.get_or_raise(dialect) 1385 self.reset()
1397 def parse( 1398 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1399 ) -> t.List[t.Optional[exp.Expression]]: 1400 """ 1401 Parses a list of tokens and returns a list of syntax trees, one tree 1402 per parsed SQL statement. 1403 1404 Args: 1405 raw_tokens: The list of tokens. 1406 sql: The original SQL string, used to produce helpful debug messages. 1407 1408 Returns: 1409 The list of the produced syntax trees. 1410 """ 1411 return self._parse( 1412 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1413 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1415 def parse_into( 1416 self, 1417 expression_types: exp.IntoType, 1418 raw_tokens: t.List[Token], 1419 sql: t.Optional[str] = None, 1420 ) -> t.List[t.Optional[exp.Expression]]: 1421 """ 1422 Parses a list of tokens into a given Expression type. If a collection of Expression 1423 types is given instead, this method will try to parse the token list into each one 1424 of them, stopping at the first for which the parsing succeeds. 1425 1426 Args: 1427 expression_types: The expression type(s) to try and parse the token list into. 1428 raw_tokens: The list of tokens. 1429 sql: The original SQL string, used to produce helpful debug messages. 1430 1431 Returns: 1432 The target Expression. 1433 """ 1434 errors = [] 1435 for expression_type in ensure_list(expression_types): 1436 parser = self.EXPRESSION_PARSERS.get(expression_type) 1437 if not parser: 1438 raise TypeError(f"No parser registered for {expression_type}") 1439 1440 try: 1441 return self._parse(parser, raw_tokens, sql) 1442 except ParseError as e: 1443 e.errors[0]["into_expression"] = expression_type 1444 errors.append(e) 1445 1446 raise ParseError( 1447 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1448 errors=merge_errors(errors), 1449 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1489 def check_errors(self) -> None: 1490 """Logs or raises any found errors, depending on the chosen error level setting.""" 1491 if self.error_level == ErrorLevel.WARN: 1492 for error in self.errors: 1493 logger.error(str(error)) 1494 elif self.error_level == ErrorLevel.RAISE and self.errors: 1495 raise ParseError( 1496 concat_messages(self.errors, self.max_errors), 1497 errors=merge_errors(self.errors), 1498 )
Logs or raises any found errors, depending on the chosen error level setting.
1500 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1501 """ 1502 Appends an error in the list of recorded errors or raises it, depending on the chosen 1503 error level setting. 1504 """ 1505 token = token or self._curr or self._prev or Token.string("") 1506 start = token.start 1507 end = token.end + 1 1508 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1509 highlight = self.sql[start:end] 1510 end_context = self.sql[end : end + self.error_message_context] 1511 1512 error = ParseError.new( 1513 f"{message}. Line {token.line}, Col: {token.col}.\n" 1514 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1515 description=message, 1516 line=token.line, 1517 col=token.col, 1518 start_context=start_context, 1519 highlight=highlight, 1520 end_context=end_context, 1521 ) 1522 1523 if self.error_level == ErrorLevel.IMMEDIATE: 1524 raise error 1525 1526 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1528 def expression( 1529 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1530 ) -> E: 1531 """ 1532 Creates a new, validated Expression. 1533 1534 Args: 1535 exp_class: The expression class to instantiate. 1536 comments: An optional list of comments to attach to the expression. 1537 kwargs: The arguments to set for the expression along with their respective values. 1538 1539 Returns: 1540 The target expression. 1541 """ 1542 instance = exp_class(**kwargs) 1543 instance.add_comments(comments) if comments else self._add_comments(instance) 1544 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1551 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1552 """ 1553 Validates an Expression, making sure that all its mandatory arguments are set. 1554 1555 Args: 1556 expression: The expression to validate. 1557 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1558 1559 Returns: 1560 The validated expression. 1561 """ 1562 if self.error_level != ErrorLevel.IGNORE: 1563 for error_message in expression.error_messages(args): 1564 self.raise_error(error_message) 1565 1566 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.