sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.UDOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.MEDIUMBLOB, 331 TokenType.LONGBLOB, 332 TokenType.BINARY, 333 TokenType.VARBINARY, 334 TokenType.JSON, 335 TokenType.JSONB, 336 TokenType.INTERVAL, 337 TokenType.TINYBLOB, 338 TokenType.TINYTEXT, 339 TokenType.TIME, 340 TokenType.TIMETZ, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMP_S, 343 TokenType.TIMESTAMP_MS, 344 TokenType.TIMESTAMP_NS, 345 TokenType.TIMESTAMPTZ, 346 TokenType.TIMESTAMPLTZ, 347 TokenType.TIMESTAMPNTZ, 348 TokenType.DATETIME, 349 TokenType.DATETIME2, 350 TokenType.DATETIME64, 351 TokenType.SMALLDATETIME, 352 TokenType.DATE, 353 TokenType.DATE32, 354 TokenType.INT4RANGE, 355 TokenType.INT4MULTIRANGE, 356 TokenType.INT8RANGE, 357 TokenType.INT8MULTIRANGE, 358 TokenType.NUMRANGE, 359 TokenType.NUMMULTIRANGE, 360 TokenType.TSRANGE, 361 TokenType.TSMULTIRANGE, 362 TokenType.TSTZRANGE, 363 TokenType.TSTZMULTIRANGE, 364 TokenType.DATERANGE, 365 TokenType.DATEMULTIRANGE, 366 TokenType.DECIMAL, 367 TokenType.DECIMAL32, 368 TokenType.DECIMAL64, 369 TokenType.DECIMAL128, 370 TokenType.DECIMAL256, 371 TokenType.UDECIMAL, 372 TokenType.BIGDECIMAL, 373 TokenType.UUID, 374 TokenType.GEOGRAPHY, 375 TokenType.GEOMETRY, 376 TokenType.POINT, 377 TokenType.RING, 378 TokenType.LINESTRING, 379 TokenType.MULTILINESTRING, 380 TokenType.POLYGON, 381 TokenType.MULTIPOLYGON, 382 TokenType.HLLSKETCH, 383 TokenType.HSTORE, 384 TokenType.PSEUDO_TYPE, 385 TokenType.SUPER, 386 TokenType.SERIAL, 387 TokenType.SMALLSERIAL, 388 TokenType.BIGSERIAL, 389 TokenType.XML, 390 TokenType.YEAR, 391 TokenType.USERDEFINED, 392 TokenType.MONEY, 393 TokenType.SMALLMONEY, 394 TokenType.ROWVERSION, 395 TokenType.IMAGE, 396 TokenType.VARIANT, 397 TokenType.VECTOR, 398 TokenType.OBJECT, 399 TokenType.OBJECT_IDENTIFIER, 400 TokenType.INET, 401 TokenType.IPADDRESS, 402 TokenType.IPPREFIX, 403 TokenType.IPV4, 404 TokenType.IPV6, 405 TokenType.UNKNOWN, 406 TokenType.NULL, 407 TokenType.NAME, 408 TokenType.TDIGEST, 409 TokenType.DYNAMIC, 410 *ENUM_TYPE_TOKENS, 411 *NESTED_TYPE_TOKENS, 412 *AGGREGATE_TYPE_TOKENS, 413 } 414 415 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 416 TokenType.BIGINT: TokenType.UBIGINT, 417 TokenType.INT: TokenType.UINT, 418 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 419 TokenType.SMALLINT: TokenType.USMALLINT, 420 TokenType.TINYINT: TokenType.UTINYINT, 421 TokenType.DECIMAL: TokenType.UDECIMAL, 422 TokenType.DOUBLE: TokenType.UDOUBLE, 423 } 424 425 SUBQUERY_PREDICATES = { 426 TokenType.ANY: exp.Any, 427 TokenType.ALL: exp.All, 428 TokenType.EXISTS: exp.Exists, 429 TokenType.SOME: exp.Any, 430 } 431 432 RESERVED_TOKENS = { 433 *Tokenizer.SINGLE_TOKENS.values(), 434 TokenType.SELECT, 435 } - {TokenType.IDENTIFIER} 436 437 DB_CREATABLES = { 438 TokenType.DATABASE, 439 TokenType.DICTIONARY, 440 TokenType.MODEL, 441 TokenType.NAMESPACE, 442 TokenType.SCHEMA, 443 TokenType.SEQUENCE, 444 TokenType.SINK, 445 TokenType.SOURCE, 446 TokenType.STORAGE_INTEGRATION, 447 TokenType.STREAMLIT, 448 TokenType.TABLE, 449 TokenType.TAG, 450 TokenType.VIEW, 451 TokenType.WAREHOUSE, 452 } 453 454 CREATABLES = { 455 TokenType.COLUMN, 456 TokenType.CONSTRAINT, 457 TokenType.FOREIGN_KEY, 458 TokenType.FUNCTION, 459 TokenType.INDEX, 460 TokenType.PROCEDURE, 461 *DB_CREATABLES, 462 } 463 464 ALTERABLES = { 465 TokenType.INDEX, 466 TokenType.TABLE, 467 TokenType.VIEW, 468 } 469 470 # Tokens that can represent identifiers 471 ID_VAR_TOKENS = { 472 TokenType.ALL, 473 TokenType.ATTACH, 474 TokenType.VAR, 475 TokenType.ANTI, 476 TokenType.APPLY, 477 TokenType.ASC, 478 TokenType.ASOF, 479 TokenType.AUTO_INCREMENT, 480 TokenType.BEGIN, 481 TokenType.BPCHAR, 482 TokenType.CACHE, 483 TokenType.CASE, 484 TokenType.COLLATE, 485 TokenType.COMMAND, 486 TokenType.COMMENT, 487 TokenType.COMMIT, 488 TokenType.CONSTRAINT, 489 TokenType.COPY, 490 TokenType.CUBE, 491 TokenType.CURRENT_SCHEMA, 492 TokenType.DEFAULT, 493 TokenType.DELETE, 494 TokenType.DESC, 495 TokenType.DESCRIBE, 496 TokenType.DETACH, 497 TokenType.DICTIONARY, 498 TokenType.DIV, 499 TokenType.END, 500 TokenType.EXECUTE, 501 TokenType.EXPORT, 502 TokenType.ESCAPE, 503 TokenType.FALSE, 504 TokenType.FIRST, 505 TokenType.FILTER, 506 TokenType.FINAL, 507 TokenType.FORMAT, 508 TokenType.FULL, 509 TokenType.IDENTIFIER, 510 TokenType.IS, 511 TokenType.ISNULL, 512 TokenType.INTERVAL, 513 TokenType.KEEP, 514 TokenType.KILL, 515 TokenType.LEFT, 516 TokenType.LIMIT, 517 TokenType.LOAD, 518 TokenType.MERGE, 519 TokenType.NATURAL, 520 TokenType.NEXT, 521 TokenType.OFFSET, 522 TokenType.OPERATOR, 523 TokenType.ORDINALITY, 524 TokenType.OVERLAPS, 525 TokenType.OVERWRITE, 526 TokenType.PARTITION, 527 TokenType.PERCENT, 528 TokenType.PIVOT, 529 TokenType.PRAGMA, 530 TokenType.RANGE, 531 TokenType.RECURSIVE, 532 TokenType.REFERENCES, 533 TokenType.REFRESH, 534 TokenType.RENAME, 535 TokenType.REPLACE, 536 TokenType.RIGHT, 537 TokenType.ROLLUP, 538 TokenType.ROW, 539 TokenType.ROWS, 540 TokenType.SEMI, 541 TokenType.SET, 542 TokenType.SETTINGS, 543 TokenType.SHOW, 544 TokenType.TEMPORARY, 545 TokenType.TOP, 546 TokenType.TRUE, 547 TokenType.TRUNCATE, 548 TokenType.UNIQUE, 549 TokenType.UNNEST, 550 TokenType.UNPIVOT, 551 TokenType.UPDATE, 552 TokenType.USE, 553 TokenType.VOLATILE, 554 TokenType.WINDOW, 555 *CREATABLES, 556 *SUBQUERY_PREDICATES, 557 *TYPE_TOKENS, 558 *NO_PAREN_FUNCTIONS, 559 } 560 ID_VAR_TOKENS.remove(TokenType.UNION) 561 562 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 563 TokenType.ANTI, 564 TokenType.APPLY, 565 TokenType.ASOF, 566 TokenType.FULL, 567 TokenType.LEFT, 568 TokenType.LOCK, 569 TokenType.NATURAL, 570 TokenType.RIGHT, 571 TokenType.SEMI, 572 TokenType.WINDOW, 573 } 574 575 ALIAS_TOKENS = ID_VAR_TOKENS 576 577 ARRAY_CONSTRUCTORS = { 578 "ARRAY": exp.Array, 579 "LIST": exp.List, 580 } 581 582 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 583 584 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 585 586 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 587 588 FUNC_TOKENS = { 589 TokenType.COLLATE, 590 TokenType.COMMAND, 591 TokenType.CURRENT_DATE, 592 TokenType.CURRENT_DATETIME, 593 TokenType.CURRENT_SCHEMA, 594 TokenType.CURRENT_TIMESTAMP, 595 TokenType.CURRENT_TIME, 596 TokenType.CURRENT_USER, 597 TokenType.FILTER, 598 TokenType.FIRST, 599 TokenType.FORMAT, 600 TokenType.GLOB, 601 TokenType.IDENTIFIER, 602 TokenType.INDEX, 603 TokenType.ISNULL, 604 TokenType.ILIKE, 605 TokenType.INSERT, 606 TokenType.LIKE, 607 TokenType.MERGE, 608 TokenType.NEXT, 609 TokenType.OFFSET, 610 TokenType.PRIMARY_KEY, 611 TokenType.RANGE, 612 TokenType.REPLACE, 613 TokenType.RLIKE, 614 TokenType.ROW, 615 TokenType.UNNEST, 616 TokenType.VAR, 617 TokenType.LEFT, 618 TokenType.RIGHT, 619 TokenType.SEQUENCE, 620 TokenType.DATE, 621 TokenType.DATETIME, 622 TokenType.TABLE, 623 TokenType.TIMESTAMP, 624 TokenType.TIMESTAMPTZ, 625 TokenType.TRUNCATE, 626 TokenType.WINDOW, 627 TokenType.XOR, 628 *TYPE_TOKENS, 629 *SUBQUERY_PREDICATES, 630 } 631 632 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 633 TokenType.AND: exp.And, 634 } 635 636 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 637 TokenType.COLON_EQ: exp.PropertyEQ, 638 } 639 640 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 641 TokenType.OR: exp.Or, 642 } 643 644 EQUALITY = { 645 TokenType.EQ: exp.EQ, 646 TokenType.NEQ: exp.NEQ, 647 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 648 } 649 650 COMPARISON = { 651 TokenType.GT: exp.GT, 652 TokenType.GTE: exp.GTE, 653 TokenType.LT: exp.LT, 654 TokenType.LTE: exp.LTE, 655 } 656 657 BITWISE = { 658 TokenType.AMP: exp.BitwiseAnd, 659 TokenType.CARET: exp.BitwiseXor, 660 TokenType.PIPE: exp.BitwiseOr, 661 } 662 663 TERM = { 664 TokenType.DASH: exp.Sub, 665 TokenType.PLUS: exp.Add, 666 TokenType.MOD: exp.Mod, 667 TokenType.COLLATE: exp.Collate, 668 } 669 670 FACTOR = { 671 TokenType.DIV: exp.IntDiv, 672 TokenType.LR_ARROW: exp.Distance, 673 TokenType.SLASH: exp.Div, 674 TokenType.STAR: exp.Mul, 675 } 676 677 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 678 679 TIMES = { 680 TokenType.TIME, 681 TokenType.TIMETZ, 682 } 683 684 TIMESTAMPS = { 685 TokenType.TIMESTAMP, 686 TokenType.TIMESTAMPTZ, 687 TokenType.TIMESTAMPLTZ, 688 *TIMES, 689 } 690 691 SET_OPERATIONS = { 692 TokenType.UNION, 693 TokenType.INTERSECT, 694 TokenType.EXCEPT, 695 } 696 697 JOIN_METHODS = { 698 TokenType.ASOF, 699 TokenType.NATURAL, 700 TokenType.POSITIONAL, 701 } 702 703 JOIN_SIDES = { 704 TokenType.LEFT, 705 TokenType.RIGHT, 706 TokenType.FULL, 707 } 708 709 JOIN_KINDS = { 710 TokenType.ANTI, 711 TokenType.CROSS, 712 TokenType.INNER, 713 TokenType.OUTER, 714 TokenType.SEMI, 715 TokenType.STRAIGHT_JOIN, 716 } 717 718 JOIN_HINTS: t.Set[str] = set() 719 720 LAMBDAS = { 721 TokenType.ARROW: lambda self, expressions: self.expression( 722 exp.Lambda, 723 this=self._replace_lambda( 724 self._parse_assignment(), 725 expressions, 726 ), 727 expressions=expressions, 728 ), 729 TokenType.FARROW: lambda self, expressions: self.expression( 730 exp.Kwarg, 731 this=exp.var(expressions[0].name), 732 expression=self._parse_assignment(), 733 ), 734 } 735 736 COLUMN_OPERATORS = { 737 TokenType.DOT: None, 738 TokenType.DOTCOLON: lambda self, this, to: self.expression( 739 exp.JSONCast, 740 this=this, 741 to=to, 742 ), 743 TokenType.DCOLON: lambda self, this, to: self.expression( 744 exp.Cast if self.STRICT_CAST else exp.TryCast, 745 this=this, 746 to=to, 747 ), 748 TokenType.ARROW: lambda self, this, path: self.expression( 749 exp.JSONExtract, 750 this=this, 751 expression=self.dialect.to_json_path(path), 752 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 753 ), 754 TokenType.DARROW: lambda self, this, path: self.expression( 755 exp.JSONExtractScalar, 756 this=this, 757 expression=self.dialect.to_json_path(path), 758 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 759 ), 760 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 761 exp.JSONBExtract, 762 this=this, 763 expression=path, 764 ), 765 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 766 exp.JSONBExtractScalar, 767 this=this, 768 expression=path, 769 ), 770 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 771 exp.JSONBContains, 772 this=this, 773 expression=key, 774 ), 775 } 776 777 EXPRESSION_PARSERS = { 778 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 779 exp.Column: lambda self: self._parse_column(), 780 exp.Condition: lambda self: self._parse_assignment(), 781 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 782 exp.Expression: lambda self: self._parse_expression(), 783 exp.From: lambda self: self._parse_from(joins=True), 784 exp.Group: lambda self: self._parse_group(), 785 exp.Having: lambda self: self._parse_having(), 786 exp.Hint: lambda self: self._parse_hint_body(), 787 exp.Identifier: lambda self: self._parse_id_var(), 788 exp.Join: lambda self: self._parse_join(), 789 exp.Lambda: lambda self: self._parse_lambda(), 790 exp.Lateral: lambda self: self._parse_lateral(), 791 exp.Limit: lambda self: self._parse_limit(), 792 exp.Offset: lambda self: self._parse_offset(), 793 exp.Order: lambda self: self._parse_order(), 794 exp.Ordered: lambda self: self._parse_ordered(), 795 exp.Properties: lambda self: self._parse_properties(), 796 exp.Qualify: lambda self: self._parse_qualify(), 797 exp.Returning: lambda self: self._parse_returning(), 798 exp.Select: lambda self: self._parse_select(), 799 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 800 exp.Table: lambda self: self._parse_table_parts(), 801 exp.TableAlias: lambda self: self._parse_table_alias(), 802 exp.Tuple: lambda self: self._parse_value(), 803 exp.Whens: lambda self: self._parse_when_matched(), 804 exp.Where: lambda self: self._parse_where(), 805 exp.Window: lambda self: self._parse_named_window(), 806 exp.With: lambda self: self._parse_with(), 807 "JOIN_TYPE": lambda self: self._parse_join_parts(), 808 } 809 810 STATEMENT_PARSERS = { 811 TokenType.ALTER: lambda self: self._parse_alter(), 812 TokenType.ANALYZE: lambda self: self._parse_analyze(), 813 TokenType.BEGIN: lambda self: self._parse_transaction(), 814 TokenType.CACHE: lambda self: self._parse_cache(), 815 TokenType.COMMENT: lambda self: self._parse_comment(), 816 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 817 TokenType.COPY: lambda self: self._parse_copy(), 818 TokenType.CREATE: lambda self: self._parse_create(), 819 TokenType.DELETE: lambda self: self._parse_delete(), 820 TokenType.DESC: lambda self: self._parse_describe(), 821 TokenType.DESCRIBE: lambda self: self._parse_describe(), 822 TokenType.DROP: lambda self: self._parse_drop(), 823 TokenType.GRANT: lambda self: self._parse_grant(), 824 TokenType.INSERT: lambda self: self._parse_insert(), 825 TokenType.KILL: lambda self: self._parse_kill(), 826 TokenType.LOAD: lambda self: self._parse_load(), 827 TokenType.MERGE: lambda self: self._parse_merge(), 828 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 829 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 830 TokenType.REFRESH: lambda self: self._parse_refresh(), 831 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 832 TokenType.SET: lambda self: self._parse_set(), 833 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 834 TokenType.UNCACHE: lambda self: self._parse_uncache(), 835 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 836 TokenType.UPDATE: lambda self: self._parse_update(), 837 TokenType.USE: lambda self: self._parse_use(), 838 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 839 } 840 841 UNARY_PARSERS = { 842 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 843 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 844 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 845 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 846 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 847 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 848 } 849 850 STRING_PARSERS = { 851 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 852 exp.RawString, this=token.text 853 ), 854 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 855 exp.National, this=token.text 856 ), 857 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 858 TokenType.STRING: lambda self, token: self.expression( 859 exp.Literal, this=token.text, is_string=True 860 ), 861 TokenType.UNICODE_STRING: lambda self, token: self.expression( 862 exp.UnicodeString, 863 this=token.text, 864 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 865 ), 866 } 867 868 NUMERIC_PARSERS = { 869 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 870 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 871 TokenType.HEX_STRING: lambda self, token: self.expression( 872 exp.HexString, 873 this=token.text, 874 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 875 ), 876 TokenType.NUMBER: lambda self, token: self.expression( 877 exp.Literal, this=token.text, is_string=False 878 ), 879 } 880 881 PRIMARY_PARSERS = { 882 **STRING_PARSERS, 883 **NUMERIC_PARSERS, 884 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 885 TokenType.NULL: lambda self, _: self.expression(exp.Null), 886 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 887 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 888 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 889 TokenType.STAR: lambda self, _: self._parse_star_ops(), 890 } 891 892 PLACEHOLDER_PARSERS = { 893 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 894 TokenType.PARAMETER: lambda self: self._parse_parameter(), 895 TokenType.COLON: lambda self: ( 896 self.expression(exp.Placeholder, this=self._prev.text) 897 if self._match_set(self.ID_VAR_TOKENS) 898 else None 899 ), 900 } 901 902 RANGE_PARSERS = { 903 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 904 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 905 TokenType.GLOB: binary_range_parser(exp.Glob), 906 TokenType.ILIKE: binary_range_parser(exp.ILike), 907 TokenType.IN: lambda self, this: self._parse_in(this), 908 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 909 TokenType.IS: lambda self, this: self._parse_is(this), 910 TokenType.LIKE: binary_range_parser(exp.Like), 911 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 912 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 913 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 914 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 915 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 916 } 917 918 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 919 "ALLOWED_VALUES": lambda self: self.expression( 920 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 921 ), 922 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 923 "AUTO": lambda self: self._parse_auto_property(), 924 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 925 "BACKUP": lambda self: self.expression( 926 exp.BackupProperty, this=self._parse_var(any_token=True) 927 ), 928 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 929 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 930 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 931 "CHECKSUM": lambda self: self._parse_checksum(), 932 "CLUSTER BY": lambda self: self._parse_cluster(), 933 "CLUSTERED": lambda self: self._parse_clustered_by(), 934 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 935 exp.CollateProperty, **kwargs 936 ), 937 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 938 "CONTAINS": lambda self: self._parse_contains_property(), 939 "COPY": lambda self: self._parse_copy_property(), 940 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 941 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 942 "DEFINER": lambda self: self._parse_definer(), 943 "DETERMINISTIC": lambda self: self.expression( 944 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 945 ), 946 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 947 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 948 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 949 "DISTKEY": lambda self: self._parse_distkey(), 950 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 951 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 952 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 953 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 954 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 955 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 956 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 957 "FREESPACE": lambda self: self._parse_freespace(), 958 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 959 "HEAP": lambda self: self.expression(exp.HeapProperty), 960 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 961 "IMMUTABLE": lambda self: self.expression( 962 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 963 ), 964 "INHERITS": lambda self: self.expression( 965 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 966 ), 967 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 968 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 969 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 970 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 971 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 972 "LIKE": lambda self: self._parse_create_like(), 973 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 974 "LOCK": lambda self: self._parse_locking(), 975 "LOCKING": lambda self: self._parse_locking(), 976 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 977 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 978 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 979 "MODIFIES": lambda self: self._parse_modifies_property(), 980 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 981 "NO": lambda self: self._parse_no_property(), 982 "ON": lambda self: self._parse_on_property(), 983 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 984 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 985 "PARTITION": lambda self: self._parse_partitioned_of(), 986 "PARTITION BY": lambda self: self._parse_partitioned_by(), 987 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 988 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 989 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 990 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 991 "READS": lambda self: self._parse_reads_property(), 992 "REMOTE": lambda self: self._parse_remote_with_connection(), 993 "RETURNS": lambda self: self._parse_returns(), 994 "STRICT": lambda self: self.expression(exp.StrictProperty), 995 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 996 "ROW": lambda self: self._parse_row(), 997 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 998 "SAMPLE": lambda self: self.expression( 999 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1000 ), 1001 "SECURE": lambda self: self.expression(exp.SecureProperty), 1002 "SECURITY": lambda self: self._parse_security(), 1003 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1004 "SETTINGS": lambda self: self._parse_settings_property(), 1005 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1006 "SORTKEY": lambda self: self._parse_sortkey(), 1007 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1008 "STABLE": lambda self: self.expression( 1009 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1010 ), 1011 "STORED": lambda self: self._parse_stored(), 1012 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1013 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1014 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1015 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1016 "TO": lambda self: self._parse_to_table(), 1017 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1018 "TRANSFORM": lambda self: self.expression( 1019 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1020 ), 1021 "TTL": lambda self: self._parse_ttl(), 1022 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1023 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1024 "VOLATILE": lambda self: self._parse_volatile_property(), 1025 "WITH": lambda self: self._parse_with_property(), 1026 } 1027 1028 CONSTRAINT_PARSERS = { 1029 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1030 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1031 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1032 "CHARACTER SET": lambda self: self.expression( 1033 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1034 ), 1035 "CHECK": lambda self: self.expression( 1036 exp.CheckColumnConstraint, 1037 this=self._parse_wrapped(self._parse_assignment), 1038 enforced=self._match_text_seq("ENFORCED"), 1039 ), 1040 "COLLATE": lambda self: self.expression( 1041 exp.CollateColumnConstraint, 1042 this=self._parse_identifier() or self._parse_column(), 1043 ), 1044 "COMMENT": lambda self: self.expression( 1045 exp.CommentColumnConstraint, this=self._parse_string() 1046 ), 1047 "COMPRESS": lambda self: self._parse_compress(), 1048 "CLUSTERED": lambda self: self.expression( 1049 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1050 ), 1051 "NONCLUSTERED": lambda self: self.expression( 1052 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1053 ), 1054 "DEFAULT": lambda self: self.expression( 1055 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1056 ), 1057 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1058 "EPHEMERAL": lambda self: self.expression( 1059 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1060 ), 1061 "EXCLUDE": lambda self: self.expression( 1062 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1063 ), 1064 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1065 "FORMAT": lambda self: self.expression( 1066 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1067 ), 1068 "GENERATED": lambda self: self._parse_generated_as_identity(), 1069 "IDENTITY": lambda self: self._parse_auto_increment(), 1070 "INLINE": lambda self: self._parse_inline(), 1071 "LIKE": lambda self: self._parse_create_like(), 1072 "NOT": lambda self: self._parse_not_constraint(), 1073 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1074 "ON": lambda self: ( 1075 self._match(TokenType.UPDATE) 1076 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1077 ) 1078 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1079 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1080 "PERIOD": lambda self: self._parse_period_for_system_time(), 1081 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1082 "REFERENCES": lambda self: self._parse_references(match=False), 1083 "TITLE": lambda self: self.expression( 1084 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1085 ), 1086 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1087 "UNIQUE": lambda self: self._parse_unique(), 1088 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1089 "WATERMARK": lambda self: self.expression( 1090 exp.WatermarkColumnConstraint, 1091 this=self._match(TokenType.FOR) and self._parse_column(), 1092 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1093 ), 1094 "WITH": lambda self: self.expression( 1095 exp.Properties, expressions=self._parse_wrapped_properties() 1096 ), 1097 } 1098 1099 ALTER_PARSERS = { 1100 "ADD": lambda self: self._parse_alter_table_add(), 1101 "AS": lambda self: self._parse_select(), 1102 "ALTER": lambda self: self._parse_alter_table_alter(), 1103 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1104 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1105 "DROP": lambda self: self._parse_alter_table_drop(), 1106 "RENAME": lambda self: self._parse_alter_table_rename(), 1107 "SET": lambda self: self._parse_alter_table_set(), 1108 "SWAP": lambda self: self.expression( 1109 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1110 ), 1111 } 1112 1113 ALTER_ALTER_PARSERS = { 1114 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1115 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1116 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1117 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1118 } 1119 1120 SCHEMA_UNNAMED_CONSTRAINTS = { 1121 "CHECK", 1122 "EXCLUDE", 1123 "FOREIGN KEY", 1124 "LIKE", 1125 "PERIOD", 1126 "PRIMARY KEY", 1127 "UNIQUE", 1128 "WATERMARK", 1129 } 1130 1131 NO_PAREN_FUNCTION_PARSERS = { 1132 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1133 "CASE": lambda self: self._parse_case(), 1134 "CONNECT_BY_ROOT": lambda self: self.expression( 1135 exp.ConnectByRoot, this=self._parse_column() 1136 ), 1137 "IF": lambda self: self._parse_if(), 1138 } 1139 1140 INVALID_FUNC_NAME_TOKENS = { 1141 TokenType.IDENTIFIER, 1142 TokenType.STRING, 1143 } 1144 1145 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1146 1147 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1148 1149 FUNCTION_PARSERS = { 1150 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1151 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1152 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1153 "DECODE": lambda self: self._parse_decode(), 1154 "EXTRACT": lambda self: self._parse_extract(), 1155 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1156 "GAP_FILL": lambda self: self._parse_gap_fill(), 1157 "JSON_OBJECT": lambda self: self._parse_json_object(), 1158 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1159 "JSON_TABLE": lambda self: self._parse_json_table(), 1160 "MATCH": lambda self: self._parse_match_against(), 1161 "NORMALIZE": lambda self: self._parse_normalize(), 1162 "OPENJSON": lambda self: self._parse_open_json(), 1163 "OVERLAY": lambda self: self._parse_overlay(), 1164 "POSITION": lambda self: self._parse_position(), 1165 "PREDICT": lambda self: self._parse_predict(), 1166 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1167 "STRING_AGG": lambda self: self._parse_string_agg(), 1168 "SUBSTRING": lambda self: self._parse_substring(), 1169 "TRIM": lambda self: self._parse_trim(), 1170 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1171 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1172 "XMLELEMENT": lambda self: self.expression( 1173 exp.XMLElement, 1174 this=self._match_text_seq("NAME") and self._parse_id_var(), 1175 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1176 ), 1177 "XMLTABLE": lambda self: self._parse_xml_table(), 1178 } 1179 1180 QUERY_MODIFIER_PARSERS = { 1181 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1182 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1183 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1184 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1185 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1186 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1187 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1188 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1189 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1190 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1191 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1192 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1193 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1194 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1195 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1196 TokenType.CLUSTER_BY: lambda self: ( 1197 "cluster", 1198 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1199 ), 1200 TokenType.DISTRIBUTE_BY: lambda self: ( 1201 "distribute", 1202 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1203 ), 1204 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1205 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1206 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1207 } 1208 1209 SET_PARSERS = { 1210 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1211 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1212 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1213 "TRANSACTION": lambda self: self._parse_set_transaction(), 1214 } 1215 1216 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1217 1218 TYPE_LITERAL_PARSERS = { 1219 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1220 } 1221 1222 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1223 1224 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1225 1226 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1227 1228 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1229 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1230 "ISOLATION": ( 1231 ("LEVEL", "REPEATABLE", "READ"), 1232 ("LEVEL", "READ", "COMMITTED"), 1233 ("LEVEL", "READ", "UNCOMITTED"), 1234 ("LEVEL", "SERIALIZABLE"), 1235 ), 1236 "READ": ("WRITE", "ONLY"), 1237 } 1238 1239 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1240 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1241 ) 1242 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1243 1244 CREATE_SEQUENCE: OPTIONS_TYPE = { 1245 "SCALE": ("EXTEND", "NOEXTEND"), 1246 "SHARD": ("EXTEND", "NOEXTEND"), 1247 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1248 **dict.fromkeys( 1249 ( 1250 "SESSION", 1251 "GLOBAL", 1252 "KEEP", 1253 "NOKEEP", 1254 "ORDER", 1255 "NOORDER", 1256 "NOCACHE", 1257 "CYCLE", 1258 "NOCYCLE", 1259 "NOMINVALUE", 1260 "NOMAXVALUE", 1261 "NOSCALE", 1262 "NOSHARD", 1263 ), 1264 tuple(), 1265 ), 1266 } 1267 1268 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1269 1270 USABLES: OPTIONS_TYPE = dict.fromkeys( 1271 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1272 ) 1273 1274 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1275 1276 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1277 "TYPE": ("EVOLUTION",), 1278 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1279 } 1280 1281 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1282 1283 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1284 1285 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1286 "NOT": ("ENFORCED",), 1287 "MATCH": ( 1288 "FULL", 1289 "PARTIAL", 1290 "SIMPLE", 1291 ), 1292 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1293 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1294 } 1295 1296 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1297 1298 CLONE_KEYWORDS = {"CLONE", "COPY"} 1299 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1300 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1301 1302 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1303 1304 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1305 1306 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1307 1308 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1309 1310 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1311 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1312 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1313 1314 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1315 1316 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1317 1318 ADD_CONSTRAINT_TOKENS = { 1319 TokenType.CONSTRAINT, 1320 TokenType.FOREIGN_KEY, 1321 TokenType.INDEX, 1322 TokenType.KEY, 1323 TokenType.PRIMARY_KEY, 1324 TokenType.UNIQUE, 1325 } 1326 1327 DISTINCT_TOKENS = {TokenType.DISTINCT} 1328 1329 NULL_TOKENS = {TokenType.NULL} 1330 1331 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1332 1333 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1334 1335 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1336 1337 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1338 1339 ODBC_DATETIME_LITERALS = { 1340 "d": exp.Date, 1341 "t": exp.Time, 1342 "ts": exp.Timestamp, 1343 } 1344 1345 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1346 1347 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1348 1349 # The style options for the DESCRIBE statement 1350 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1351 1352 # The style options for the ANALYZE statement 1353 ANALYZE_STYLES = { 1354 "BUFFER_USAGE_LIMIT", 1355 "FULL", 1356 "LOCAL", 1357 "NO_WRITE_TO_BINLOG", 1358 "SAMPLE", 1359 "SKIP_LOCKED", 1360 "VERBOSE", 1361 } 1362 1363 ANALYZE_EXPRESSION_PARSERS = { 1364 "ALL": lambda self: self._parse_analyze_columns(), 1365 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1366 "DELETE": lambda self: self._parse_analyze_delete(), 1367 "DROP": lambda self: self._parse_analyze_histogram(), 1368 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1369 "LIST": lambda self: self._parse_analyze_list(), 1370 "PREDICATE": lambda self: self._parse_analyze_columns(), 1371 "UPDATE": lambda self: self._parse_analyze_histogram(), 1372 "VALIDATE": lambda self: self._parse_analyze_validate(), 1373 } 1374 1375 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1376 1377 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1378 1379 OPERATION_MODIFIERS: t.Set[str] = set() 1380 1381 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1382 1383 STRICT_CAST = True 1384 1385 PREFIXED_PIVOT_COLUMNS = False 1386 IDENTIFY_PIVOT_STRINGS = False 1387 1388 LOG_DEFAULTS_TO_LN = False 1389 1390 # Whether ADD is present for each column added by ALTER TABLE 1391 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1392 1393 # Whether the table sample clause expects CSV syntax 1394 TABLESAMPLE_CSV = False 1395 1396 # The default method used for table sampling 1397 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1398 1399 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1400 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1401 1402 # Whether the TRIM function expects the characters to trim as its first argument 1403 TRIM_PATTERN_FIRST = False 1404 1405 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1406 STRING_ALIASES = False 1407 1408 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1409 MODIFIERS_ATTACHED_TO_SET_OP = True 1410 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1411 1412 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1413 NO_PAREN_IF_COMMANDS = True 1414 1415 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1416 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1417 1418 # Whether the `:` operator is used to extract a value from a VARIANT column 1419 COLON_IS_VARIANT_EXTRACT = False 1420 1421 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1422 # If this is True and '(' is not found, the keyword will be treated as an identifier 1423 VALUES_FOLLOWED_BY_PAREN = True 1424 1425 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1426 SUPPORTS_IMPLICIT_UNNEST = False 1427 1428 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1429 INTERVAL_SPANS = True 1430 1431 # Whether a PARTITION clause can follow a table reference 1432 SUPPORTS_PARTITION_SELECTION = False 1433 1434 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1435 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1436 1437 # Whether the 'AS' keyword is optional in the CTE definition syntax 1438 OPTIONAL_ALIAS_TOKEN_CTE = True 1439 1440 __slots__ = ( 1441 "error_level", 1442 "error_message_context", 1443 "max_errors", 1444 "dialect", 1445 "sql", 1446 "errors", 1447 "_tokens", 1448 "_index", 1449 "_curr", 1450 "_next", 1451 "_prev", 1452 "_prev_comments", 1453 ) 1454 1455 # Autofilled 1456 SHOW_TRIE: t.Dict = {} 1457 SET_TRIE: t.Dict = {} 1458 1459 def __init__( 1460 self, 1461 error_level: t.Optional[ErrorLevel] = None, 1462 error_message_context: int = 100, 1463 max_errors: int = 3, 1464 dialect: DialectType = None, 1465 ): 1466 from sqlglot.dialects import Dialect 1467 1468 self.error_level = error_level or ErrorLevel.IMMEDIATE 1469 self.error_message_context = error_message_context 1470 self.max_errors = max_errors 1471 self.dialect = Dialect.get_or_raise(dialect) 1472 self.reset() 1473 1474 def reset(self): 1475 self.sql = "" 1476 self.errors = [] 1477 self._tokens = [] 1478 self._index = 0 1479 self._curr = None 1480 self._next = None 1481 self._prev = None 1482 self._prev_comments = None 1483 1484 def parse( 1485 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1486 ) -> t.List[t.Optional[exp.Expression]]: 1487 """ 1488 Parses a list of tokens and returns a list of syntax trees, one tree 1489 per parsed SQL statement. 1490 1491 Args: 1492 raw_tokens: The list of tokens. 1493 sql: The original SQL string, used to produce helpful debug messages. 1494 1495 Returns: 1496 The list of the produced syntax trees. 1497 """ 1498 return self._parse( 1499 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1500 ) 1501 1502 def parse_into( 1503 self, 1504 expression_types: exp.IntoType, 1505 raw_tokens: t.List[Token], 1506 sql: t.Optional[str] = None, 1507 ) -> t.List[t.Optional[exp.Expression]]: 1508 """ 1509 Parses a list of tokens into a given Expression type. If a collection of Expression 1510 types is given instead, this method will try to parse the token list into each one 1511 of them, stopping at the first for which the parsing succeeds. 1512 1513 Args: 1514 expression_types: The expression type(s) to try and parse the token list into. 1515 raw_tokens: The list of tokens. 1516 sql: The original SQL string, used to produce helpful debug messages. 1517 1518 Returns: 1519 The target Expression. 1520 """ 1521 errors = [] 1522 for expression_type in ensure_list(expression_types): 1523 parser = self.EXPRESSION_PARSERS.get(expression_type) 1524 if not parser: 1525 raise TypeError(f"No parser registered for {expression_type}") 1526 1527 try: 1528 return self._parse(parser, raw_tokens, sql) 1529 except ParseError as e: 1530 e.errors[0]["into_expression"] = expression_type 1531 errors.append(e) 1532 1533 raise ParseError( 1534 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1535 errors=merge_errors(errors), 1536 ) from errors[-1] 1537 1538 def _parse( 1539 self, 1540 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1541 raw_tokens: t.List[Token], 1542 sql: t.Optional[str] = None, 1543 ) -> t.List[t.Optional[exp.Expression]]: 1544 self.reset() 1545 self.sql = sql or "" 1546 1547 total = len(raw_tokens) 1548 chunks: t.List[t.List[Token]] = [[]] 1549 1550 for i, token in enumerate(raw_tokens): 1551 if token.token_type == TokenType.SEMICOLON: 1552 if token.comments: 1553 chunks.append([token]) 1554 1555 if i < total - 1: 1556 chunks.append([]) 1557 else: 1558 chunks[-1].append(token) 1559 1560 expressions = [] 1561 1562 for tokens in chunks: 1563 self._index = -1 1564 self._tokens = tokens 1565 self._advance() 1566 1567 expressions.append(parse_method(self)) 1568 1569 if self._index < len(self._tokens): 1570 self.raise_error("Invalid expression / Unexpected token") 1571 1572 self.check_errors() 1573 1574 return expressions 1575 1576 def check_errors(self) -> None: 1577 """Logs or raises any found errors, depending on the chosen error level setting.""" 1578 if self.error_level == ErrorLevel.WARN: 1579 for error in self.errors: 1580 logger.error(str(error)) 1581 elif self.error_level == ErrorLevel.RAISE and self.errors: 1582 raise ParseError( 1583 concat_messages(self.errors, self.max_errors), 1584 errors=merge_errors(self.errors), 1585 ) 1586 1587 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1588 """ 1589 Appends an error in the list of recorded errors or raises it, depending on the chosen 1590 error level setting. 1591 """ 1592 token = token or self._curr or self._prev or Token.string("") 1593 start = token.start 1594 end = token.end + 1 1595 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1596 highlight = self.sql[start:end] 1597 end_context = self.sql[end : end + self.error_message_context] 1598 1599 error = ParseError.new( 1600 f"{message}. Line {token.line}, Col: {token.col}.\n" 1601 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1602 description=message, 1603 line=token.line, 1604 col=token.col, 1605 start_context=start_context, 1606 highlight=highlight, 1607 end_context=end_context, 1608 ) 1609 1610 if self.error_level == ErrorLevel.IMMEDIATE: 1611 raise error 1612 1613 self.errors.append(error) 1614 1615 def expression( 1616 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1617 ) -> E: 1618 """ 1619 Creates a new, validated Expression. 1620 1621 Args: 1622 exp_class: The expression class to instantiate. 1623 comments: An optional list of comments to attach to the expression. 1624 kwargs: The arguments to set for the expression along with their respective values. 1625 1626 Returns: 1627 The target expression. 1628 """ 1629 instance = exp_class(**kwargs) 1630 instance.add_comments(comments) if comments else self._add_comments(instance) 1631 return self.validate_expression(instance) 1632 1633 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1634 if expression and self._prev_comments: 1635 expression.add_comments(self._prev_comments) 1636 self._prev_comments = None 1637 1638 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1639 """ 1640 Validates an Expression, making sure that all its mandatory arguments are set. 1641 1642 Args: 1643 expression: The expression to validate. 1644 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1645 1646 Returns: 1647 The validated expression. 1648 """ 1649 if self.error_level != ErrorLevel.IGNORE: 1650 for error_message in expression.error_messages(args): 1651 self.raise_error(error_message) 1652 1653 return expression 1654 1655 def _find_sql(self, start: Token, end: Token) -> str: 1656 return self.sql[start.start : end.end + 1] 1657 1658 def _is_connected(self) -> bool: 1659 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1660 1661 def _advance(self, times: int = 1) -> None: 1662 self._index += times 1663 self._curr = seq_get(self._tokens, self._index) 1664 self._next = seq_get(self._tokens, self._index + 1) 1665 1666 if self._index > 0: 1667 self._prev = self._tokens[self._index - 1] 1668 self._prev_comments = self._prev.comments 1669 else: 1670 self._prev = None 1671 self._prev_comments = None 1672 1673 def _retreat(self, index: int) -> None: 1674 if index != self._index: 1675 self._advance(index - self._index) 1676 1677 def _warn_unsupported(self) -> None: 1678 if len(self._tokens) <= 1: 1679 return 1680 1681 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1682 # interested in emitting a warning for the one being currently processed. 1683 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1684 1685 logger.warning( 1686 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1687 ) 1688 1689 def _parse_command(self) -> exp.Command: 1690 self._warn_unsupported() 1691 return self.expression( 1692 exp.Command, 1693 comments=self._prev_comments, 1694 this=self._prev.text.upper(), 1695 expression=self._parse_string(), 1696 ) 1697 1698 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1699 """ 1700 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1701 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1702 solve this by setting & resetting the parser state accordingly 1703 """ 1704 index = self._index 1705 error_level = self.error_level 1706 1707 self.error_level = ErrorLevel.IMMEDIATE 1708 try: 1709 this = parse_method() 1710 except ParseError: 1711 this = None 1712 finally: 1713 if not this or retreat: 1714 self._retreat(index) 1715 self.error_level = error_level 1716 1717 return this 1718 1719 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1720 start = self._prev 1721 exists = self._parse_exists() if allow_exists else None 1722 1723 self._match(TokenType.ON) 1724 1725 materialized = self._match_text_seq("MATERIALIZED") 1726 kind = self._match_set(self.CREATABLES) and self._prev 1727 if not kind: 1728 return self._parse_as_command(start) 1729 1730 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1731 this = self._parse_user_defined_function(kind=kind.token_type) 1732 elif kind.token_type == TokenType.TABLE: 1733 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1734 elif kind.token_type == TokenType.COLUMN: 1735 this = self._parse_column() 1736 else: 1737 this = self._parse_id_var() 1738 1739 self._match(TokenType.IS) 1740 1741 return self.expression( 1742 exp.Comment, 1743 this=this, 1744 kind=kind.text, 1745 expression=self._parse_string(), 1746 exists=exists, 1747 materialized=materialized, 1748 ) 1749 1750 def _parse_to_table( 1751 self, 1752 ) -> exp.ToTableProperty: 1753 table = self._parse_table_parts(schema=True) 1754 return self.expression(exp.ToTableProperty, this=table) 1755 1756 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1757 def _parse_ttl(self) -> exp.Expression: 1758 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1759 this = self._parse_bitwise() 1760 1761 if self._match_text_seq("DELETE"): 1762 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1763 if self._match_text_seq("RECOMPRESS"): 1764 return self.expression( 1765 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1766 ) 1767 if self._match_text_seq("TO", "DISK"): 1768 return self.expression( 1769 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1770 ) 1771 if self._match_text_seq("TO", "VOLUME"): 1772 return self.expression( 1773 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1774 ) 1775 1776 return this 1777 1778 expressions = self._parse_csv(_parse_ttl_action) 1779 where = self._parse_where() 1780 group = self._parse_group() 1781 1782 aggregates = None 1783 if group and self._match(TokenType.SET): 1784 aggregates = self._parse_csv(self._parse_set_item) 1785 1786 return self.expression( 1787 exp.MergeTreeTTL, 1788 expressions=expressions, 1789 where=where, 1790 group=group, 1791 aggregates=aggregates, 1792 ) 1793 1794 def _parse_statement(self) -> t.Optional[exp.Expression]: 1795 if self._curr is None: 1796 return None 1797 1798 if self._match_set(self.STATEMENT_PARSERS): 1799 comments = self._prev_comments 1800 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1801 stmt.add_comments(comments, prepend=True) 1802 return stmt 1803 1804 if self._match_set(self.dialect.tokenizer.COMMANDS): 1805 return self._parse_command() 1806 1807 expression = self._parse_expression() 1808 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1809 return self._parse_query_modifiers(expression) 1810 1811 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1812 start = self._prev 1813 temporary = self._match(TokenType.TEMPORARY) 1814 materialized = self._match_text_seq("MATERIALIZED") 1815 1816 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1817 if not kind: 1818 return self._parse_as_command(start) 1819 1820 concurrently = self._match_text_seq("CONCURRENTLY") 1821 if_exists = exists or self._parse_exists() 1822 1823 if kind == "COLUMN": 1824 this = self._parse_column() 1825 else: 1826 this = self._parse_table_parts( 1827 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1828 ) 1829 1830 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1831 1832 if self._match(TokenType.L_PAREN, advance=False): 1833 expressions = self._parse_wrapped_csv(self._parse_types) 1834 else: 1835 expressions = None 1836 1837 return self.expression( 1838 exp.Drop, 1839 exists=if_exists, 1840 this=this, 1841 expressions=expressions, 1842 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1843 temporary=temporary, 1844 materialized=materialized, 1845 cascade=self._match_text_seq("CASCADE"), 1846 constraints=self._match_text_seq("CONSTRAINTS"), 1847 purge=self._match_text_seq("PURGE"), 1848 cluster=cluster, 1849 concurrently=concurrently, 1850 ) 1851 1852 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1853 return ( 1854 self._match_text_seq("IF") 1855 and (not not_ or self._match(TokenType.NOT)) 1856 and self._match(TokenType.EXISTS) 1857 ) 1858 1859 def _parse_create(self) -> exp.Create | exp.Command: 1860 # Note: this can't be None because we've matched a statement parser 1861 start = self._prev 1862 1863 replace = ( 1864 start.token_type == TokenType.REPLACE 1865 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1866 or self._match_pair(TokenType.OR, TokenType.ALTER) 1867 ) 1868 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1869 1870 unique = self._match(TokenType.UNIQUE) 1871 1872 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1873 clustered = True 1874 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1875 "COLUMNSTORE" 1876 ): 1877 clustered = False 1878 else: 1879 clustered = None 1880 1881 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1882 self._advance() 1883 1884 properties = None 1885 create_token = self._match_set(self.CREATABLES) and self._prev 1886 1887 if not create_token: 1888 # exp.Properties.Location.POST_CREATE 1889 properties = self._parse_properties() 1890 create_token = self._match_set(self.CREATABLES) and self._prev 1891 1892 if not properties or not create_token: 1893 return self._parse_as_command(start) 1894 1895 concurrently = self._match_text_seq("CONCURRENTLY") 1896 exists = self._parse_exists(not_=True) 1897 this = None 1898 expression: t.Optional[exp.Expression] = None 1899 indexes = None 1900 no_schema_binding = None 1901 begin = None 1902 end = None 1903 clone = None 1904 1905 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1906 nonlocal properties 1907 if properties and temp_props: 1908 properties.expressions.extend(temp_props.expressions) 1909 elif temp_props: 1910 properties = temp_props 1911 1912 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1913 this = self._parse_user_defined_function(kind=create_token.token_type) 1914 1915 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1916 extend_props(self._parse_properties()) 1917 1918 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1919 extend_props(self._parse_properties()) 1920 1921 if not expression: 1922 if self._match(TokenType.COMMAND): 1923 expression = self._parse_as_command(self._prev) 1924 else: 1925 begin = self._match(TokenType.BEGIN) 1926 return_ = self._match_text_seq("RETURN") 1927 1928 if self._match(TokenType.STRING, advance=False): 1929 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1930 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1931 expression = self._parse_string() 1932 extend_props(self._parse_properties()) 1933 else: 1934 expression = self._parse_user_defined_function_expression() 1935 1936 end = self._match_text_seq("END") 1937 1938 if return_: 1939 expression = self.expression(exp.Return, this=expression) 1940 elif create_token.token_type == TokenType.INDEX: 1941 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1942 if not self._match(TokenType.ON): 1943 index = self._parse_id_var() 1944 anonymous = False 1945 else: 1946 index = None 1947 anonymous = True 1948 1949 this = self._parse_index(index=index, anonymous=anonymous) 1950 elif create_token.token_type in self.DB_CREATABLES: 1951 table_parts = self._parse_table_parts( 1952 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1953 ) 1954 1955 # exp.Properties.Location.POST_NAME 1956 self._match(TokenType.COMMA) 1957 extend_props(self._parse_properties(before=True)) 1958 1959 this = self._parse_schema(this=table_parts) 1960 1961 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1962 extend_props(self._parse_properties()) 1963 1964 self._match(TokenType.ALIAS) 1965 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1966 # exp.Properties.Location.POST_ALIAS 1967 extend_props(self._parse_properties()) 1968 1969 if create_token.token_type == TokenType.SEQUENCE: 1970 expression = self._parse_types() 1971 extend_props(self._parse_properties()) 1972 else: 1973 expression = self._parse_ddl_select() 1974 1975 if create_token.token_type == TokenType.TABLE: 1976 # exp.Properties.Location.POST_EXPRESSION 1977 extend_props(self._parse_properties()) 1978 1979 indexes = [] 1980 while True: 1981 index = self._parse_index() 1982 1983 # exp.Properties.Location.POST_INDEX 1984 extend_props(self._parse_properties()) 1985 if not index: 1986 break 1987 else: 1988 self._match(TokenType.COMMA) 1989 indexes.append(index) 1990 elif create_token.token_type == TokenType.VIEW: 1991 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1992 no_schema_binding = True 1993 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1994 extend_props(self._parse_properties()) 1995 1996 shallow = self._match_text_seq("SHALLOW") 1997 1998 if self._match_texts(self.CLONE_KEYWORDS): 1999 copy = self._prev.text.lower() == "copy" 2000 clone = self.expression( 2001 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2002 ) 2003 2004 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2005 return self._parse_as_command(start) 2006 2007 create_kind_text = create_token.text.upper() 2008 return self.expression( 2009 exp.Create, 2010 this=this, 2011 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2012 replace=replace, 2013 refresh=refresh, 2014 unique=unique, 2015 expression=expression, 2016 exists=exists, 2017 properties=properties, 2018 indexes=indexes, 2019 no_schema_binding=no_schema_binding, 2020 begin=begin, 2021 end=end, 2022 clone=clone, 2023 concurrently=concurrently, 2024 clustered=clustered, 2025 ) 2026 2027 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2028 seq = exp.SequenceProperties() 2029 2030 options = [] 2031 index = self._index 2032 2033 while self._curr: 2034 self._match(TokenType.COMMA) 2035 if self._match_text_seq("INCREMENT"): 2036 self._match_text_seq("BY") 2037 self._match_text_seq("=") 2038 seq.set("increment", self._parse_term()) 2039 elif self._match_text_seq("MINVALUE"): 2040 seq.set("minvalue", self._parse_term()) 2041 elif self._match_text_seq("MAXVALUE"): 2042 seq.set("maxvalue", self._parse_term()) 2043 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2044 self._match_text_seq("=") 2045 seq.set("start", self._parse_term()) 2046 elif self._match_text_seq("CACHE"): 2047 # T-SQL allows empty CACHE which is initialized dynamically 2048 seq.set("cache", self._parse_number() or True) 2049 elif self._match_text_seq("OWNED", "BY"): 2050 # "OWNED BY NONE" is the default 2051 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2052 else: 2053 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2054 if opt: 2055 options.append(opt) 2056 else: 2057 break 2058 2059 seq.set("options", options if options else None) 2060 return None if self._index == index else seq 2061 2062 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2063 # only used for teradata currently 2064 self._match(TokenType.COMMA) 2065 2066 kwargs = { 2067 "no": self._match_text_seq("NO"), 2068 "dual": self._match_text_seq("DUAL"), 2069 "before": self._match_text_seq("BEFORE"), 2070 "default": self._match_text_seq("DEFAULT"), 2071 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2072 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2073 "after": self._match_text_seq("AFTER"), 2074 "minimum": self._match_texts(("MIN", "MINIMUM")), 2075 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2076 } 2077 2078 if self._match_texts(self.PROPERTY_PARSERS): 2079 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2080 try: 2081 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2082 except TypeError: 2083 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2084 2085 return None 2086 2087 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2088 return self._parse_wrapped_csv(self._parse_property) 2089 2090 def _parse_property(self) -> t.Optional[exp.Expression]: 2091 if self._match_texts(self.PROPERTY_PARSERS): 2092 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2093 2094 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2095 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2096 2097 if self._match_text_seq("COMPOUND", "SORTKEY"): 2098 return self._parse_sortkey(compound=True) 2099 2100 if self._match_text_seq("SQL", "SECURITY"): 2101 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2102 2103 index = self._index 2104 key = self._parse_column() 2105 2106 if not self._match(TokenType.EQ): 2107 self._retreat(index) 2108 return self._parse_sequence_properties() 2109 2110 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2111 if isinstance(key, exp.Column): 2112 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2113 2114 value = self._parse_bitwise() or self._parse_var(any_token=True) 2115 2116 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2117 if isinstance(value, exp.Column): 2118 value = exp.var(value.name) 2119 2120 return self.expression(exp.Property, this=key, value=value) 2121 2122 def _parse_stored(self) -> exp.FileFormatProperty: 2123 self._match(TokenType.ALIAS) 2124 2125 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2126 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2127 2128 return self.expression( 2129 exp.FileFormatProperty, 2130 this=( 2131 self.expression( 2132 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2133 ) 2134 if input_format or output_format 2135 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2136 ), 2137 ) 2138 2139 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2140 field = self._parse_field() 2141 if isinstance(field, exp.Identifier) and not field.quoted: 2142 field = exp.var(field) 2143 2144 return field 2145 2146 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2147 self._match(TokenType.EQ) 2148 self._match(TokenType.ALIAS) 2149 2150 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2151 2152 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2153 properties = [] 2154 while True: 2155 if before: 2156 prop = self._parse_property_before() 2157 else: 2158 prop = self._parse_property() 2159 if not prop: 2160 break 2161 for p in ensure_list(prop): 2162 properties.append(p) 2163 2164 if properties: 2165 return self.expression(exp.Properties, expressions=properties) 2166 2167 return None 2168 2169 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2170 return self.expression( 2171 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2172 ) 2173 2174 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2175 if self._match_texts(("DEFINER", "INVOKER")): 2176 security_specifier = self._prev.text.upper() 2177 return self.expression(exp.SecurityProperty, this=security_specifier) 2178 return None 2179 2180 def _parse_settings_property(self) -> exp.SettingsProperty: 2181 return self.expression( 2182 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2183 ) 2184 2185 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2186 if self._index >= 2: 2187 pre_volatile_token = self._tokens[self._index - 2] 2188 else: 2189 pre_volatile_token = None 2190 2191 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2192 return exp.VolatileProperty() 2193 2194 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2195 2196 def _parse_retention_period(self) -> exp.Var: 2197 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2198 number = self._parse_number() 2199 number_str = f"{number} " if number else "" 2200 unit = self._parse_var(any_token=True) 2201 return exp.var(f"{number_str}{unit}") 2202 2203 def _parse_system_versioning_property( 2204 self, with_: bool = False 2205 ) -> exp.WithSystemVersioningProperty: 2206 self._match(TokenType.EQ) 2207 prop = self.expression( 2208 exp.WithSystemVersioningProperty, 2209 **{ # type: ignore 2210 "on": True, 2211 "with": with_, 2212 }, 2213 ) 2214 2215 if self._match_text_seq("OFF"): 2216 prop.set("on", False) 2217 return prop 2218 2219 self._match(TokenType.ON) 2220 if self._match(TokenType.L_PAREN): 2221 while self._curr and not self._match(TokenType.R_PAREN): 2222 if self._match_text_seq("HISTORY_TABLE", "="): 2223 prop.set("this", self._parse_table_parts()) 2224 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2225 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2226 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2227 prop.set("retention_period", self._parse_retention_period()) 2228 2229 self._match(TokenType.COMMA) 2230 2231 return prop 2232 2233 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2234 self._match(TokenType.EQ) 2235 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2236 prop = self.expression(exp.DataDeletionProperty, on=on) 2237 2238 if self._match(TokenType.L_PAREN): 2239 while self._curr and not self._match(TokenType.R_PAREN): 2240 if self._match_text_seq("FILTER_COLUMN", "="): 2241 prop.set("filter_column", self._parse_column()) 2242 elif self._match_text_seq("RETENTION_PERIOD", "="): 2243 prop.set("retention_period", self._parse_retention_period()) 2244 2245 self._match(TokenType.COMMA) 2246 2247 return prop 2248 2249 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2250 kind = "HASH" 2251 expressions: t.Optional[t.List[exp.Expression]] = None 2252 if self._match_text_seq("BY", "HASH"): 2253 expressions = self._parse_wrapped_csv(self._parse_id_var) 2254 elif self._match_text_seq("BY", "RANDOM"): 2255 kind = "RANDOM" 2256 2257 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2258 buckets: t.Optional[exp.Expression] = None 2259 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2260 buckets = self._parse_number() 2261 2262 return self.expression( 2263 exp.DistributedByProperty, 2264 expressions=expressions, 2265 kind=kind, 2266 buckets=buckets, 2267 order=self._parse_order(), 2268 ) 2269 2270 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2271 self._match_text_seq("KEY") 2272 expressions = self._parse_wrapped_id_vars() 2273 return self.expression(expr_type, expressions=expressions) 2274 2275 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2276 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2277 prop = self._parse_system_versioning_property(with_=True) 2278 self._match_r_paren() 2279 return prop 2280 2281 if self._match(TokenType.L_PAREN, advance=False): 2282 return self._parse_wrapped_properties() 2283 2284 if self._match_text_seq("JOURNAL"): 2285 return self._parse_withjournaltable() 2286 2287 if self._match_texts(self.VIEW_ATTRIBUTES): 2288 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2289 2290 if self._match_text_seq("DATA"): 2291 return self._parse_withdata(no=False) 2292 elif self._match_text_seq("NO", "DATA"): 2293 return self._parse_withdata(no=True) 2294 2295 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2296 return self._parse_serde_properties(with_=True) 2297 2298 if self._match(TokenType.SCHEMA): 2299 return self.expression( 2300 exp.WithSchemaBindingProperty, 2301 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2302 ) 2303 2304 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2305 return self.expression( 2306 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2307 ) 2308 2309 if not self._next: 2310 return None 2311 2312 return self._parse_withisolatedloading() 2313 2314 def _parse_procedure_option(self) -> exp.Expression | None: 2315 if self._match_text_seq("EXECUTE", "AS"): 2316 return self.expression( 2317 exp.ExecuteAsProperty, 2318 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2319 or self._parse_string(), 2320 ) 2321 2322 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2323 2324 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2325 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2326 self._match(TokenType.EQ) 2327 2328 user = self._parse_id_var() 2329 self._match(TokenType.PARAMETER) 2330 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2331 2332 if not user or not host: 2333 return None 2334 2335 return exp.DefinerProperty(this=f"{user}@{host}") 2336 2337 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2338 self._match(TokenType.TABLE) 2339 self._match(TokenType.EQ) 2340 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2341 2342 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2343 return self.expression(exp.LogProperty, no=no) 2344 2345 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2346 return self.expression(exp.JournalProperty, **kwargs) 2347 2348 def _parse_checksum(self) -> exp.ChecksumProperty: 2349 self._match(TokenType.EQ) 2350 2351 on = None 2352 if self._match(TokenType.ON): 2353 on = True 2354 elif self._match_text_seq("OFF"): 2355 on = False 2356 2357 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2358 2359 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2360 return self.expression( 2361 exp.Cluster, 2362 expressions=( 2363 self._parse_wrapped_csv(self._parse_ordered) 2364 if wrapped 2365 else self._parse_csv(self._parse_ordered) 2366 ), 2367 ) 2368 2369 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2370 self._match_text_seq("BY") 2371 2372 self._match_l_paren() 2373 expressions = self._parse_csv(self._parse_column) 2374 self._match_r_paren() 2375 2376 if self._match_text_seq("SORTED", "BY"): 2377 self._match_l_paren() 2378 sorted_by = self._parse_csv(self._parse_ordered) 2379 self._match_r_paren() 2380 else: 2381 sorted_by = None 2382 2383 self._match(TokenType.INTO) 2384 buckets = self._parse_number() 2385 self._match_text_seq("BUCKETS") 2386 2387 return self.expression( 2388 exp.ClusteredByProperty, 2389 expressions=expressions, 2390 sorted_by=sorted_by, 2391 buckets=buckets, 2392 ) 2393 2394 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2395 if not self._match_text_seq("GRANTS"): 2396 self._retreat(self._index - 1) 2397 return None 2398 2399 return self.expression(exp.CopyGrantsProperty) 2400 2401 def _parse_freespace(self) -> exp.FreespaceProperty: 2402 self._match(TokenType.EQ) 2403 return self.expression( 2404 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2405 ) 2406 2407 def _parse_mergeblockratio( 2408 self, no: bool = False, default: bool = False 2409 ) -> exp.MergeBlockRatioProperty: 2410 if self._match(TokenType.EQ): 2411 return self.expression( 2412 exp.MergeBlockRatioProperty, 2413 this=self._parse_number(), 2414 percent=self._match(TokenType.PERCENT), 2415 ) 2416 2417 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2418 2419 def _parse_datablocksize( 2420 self, 2421 default: t.Optional[bool] = None, 2422 minimum: t.Optional[bool] = None, 2423 maximum: t.Optional[bool] = None, 2424 ) -> exp.DataBlocksizeProperty: 2425 self._match(TokenType.EQ) 2426 size = self._parse_number() 2427 2428 units = None 2429 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2430 units = self._prev.text 2431 2432 return self.expression( 2433 exp.DataBlocksizeProperty, 2434 size=size, 2435 units=units, 2436 default=default, 2437 minimum=minimum, 2438 maximum=maximum, 2439 ) 2440 2441 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2442 self._match(TokenType.EQ) 2443 always = self._match_text_seq("ALWAYS") 2444 manual = self._match_text_seq("MANUAL") 2445 never = self._match_text_seq("NEVER") 2446 default = self._match_text_seq("DEFAULT") 2447 2448 autotemp = None 2449 if self._match_text_seq("AUTOTEMP"): 2450 autotemp = self._parse_schema() 2451 2452 return self.expression( 2453 exp.BlockCompressionProperty, 2454 always=always, 2455 manual=manual, 2456 never=never, 2457 default=default, 2458 autotemp=autotemp, 2459 ) 2460 2461 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2462 index = self._index 2463 no = self._match_text_seq("NO") 2464 concurrent = self._match_text_seq("CONCURRENT") 2465 2466 if not self._match_text_seq("ISOLATED", "LOADING"): 2467 self._retreat(index) 2468 return None 2469 2470 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2471 return self.expression( 2472 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2473 ) 2474 2475 def _parse_locking(self) -> exp.LockingProperty: 2476 if self._match(TokenType.TABLE): 2477 kind = "TABLE" 2478 elif self._match(TokenType.VIEW): 2479 kind = "VIEW" 2480 elif self._match(TokenType.ROW): 2481 kind = "ROW" 2482 elif self._match_text_seq("DATABASE"): 2483 kind = "DATABASE" 2484 else: 2485 kind = None 2486 2487 if kind in ("DATABASE", "TABLE", "VIEW"): 2488 this = self._parse_table_parts() 2489 else: 2490 this = None 2491 2492 if self._match(TokenType.FOR): 2493 for_or_in = "FOR" 2494 elif self._match(TokenType.IN): 2495 for_or_in = "IN" 2496 else: 2497 for_or_in = None 2498 2499 if self._match_text_seq("ACCESS"): 2500 lock_type = "ACCESS" 2501 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2502 lock_type = "EXCLUSIVE" 2503 elif self._match_text_seq("SHARE"): 2504 lock_type = "SHARE" 2505 elif self._match_text_seq("READ"): 2506 lock_type = "READ" 2507 elif self._match_text_seq("WRITE"): 2508 lock_type = "WRITE" 2509 elif self._match_text_seq("CHECKSUM"): 2510 lock_type = "CHECKSUM" 2511 else: 2512 lock_type = None 2513 2514 override = self._match_text_seq("OVERRIDE") 2515 2516 return self.expression( 2517 exp.LockingProperty, 2518 this=this, 2519 kind=kind, 2520 for_or_in=for_or_in, 2521 lock_type=lock_type, 2522 override=override, 2523 ) 2524 2525 def _parse_partition_by(self) -> t.List[exp.Expression]: 2526 if self._match(TokenType.PARTITION_BY): 2527 return self._parse_csv(self._parse_assignment) 2528 return [] 2529 2530 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2531 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2532 if self._match_text_seq("MINVALUE"): 2533 return exp.var("MINVALUE") 2534 if self._match_text_seq("MAXVALUE"): 2535 return exp.var("MAXVALUE") 2536 return self._parse_bitwise() 2537 2538 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2539 expression = None 2540 from_expressions = None 2541 to_expressions = None 2542 2543 if self._match(TokenType.IN): 2544 this = self._parse_wrapped_csv(self._parse_bitwise) 2545 elif self._match(TokenType.FROM): 2546 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2547 self._match_text_seq("TO") 2548 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2549 elif self._match_text_seq("WITH", "(", "MODULUS"): 2550 this = self._parse_number() 2551 self._match_text_seq(",", "REMAINDER") 2552 expression = self._parse_number() 2553 self._match_r_paren() 2554 else: 2555 self.raise_error("Failed to parse partition bound spec.") 2556 2557 return self.expression( 2558 exp.PartitionBoundSpec, 2559 this=this, 2560 expression=expression, 2561 from_expressions=from_expressions, 2562 to_expressions=to_expressions, 2563 ) 2564 2565 # https://www.postgresql.org/docs/current/sql-createtable.html 2566 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2567 if not self._match_text_seq("OF"): 2568 self._retreat(self._index - 1) 2569 return None 2570 2571 this = self._parse_table(schema=True) 2572 2573 if self._match(TokenType.DEFAULT): 2574 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2575 elif self._match_text_seq("FOR", "VALUES"): 2576 expression = self._parse_partition_bound_spec() 2577 else: 2578 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2579 2580 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2581 2582 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2583 self._match(TokenType.EQ) 2584 return self.expression( 2585 exp.PartitionedByProperty, 2586 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2587 ) 2588 2589 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2590 if self._match_text_seq("AND", "STATISTICS"): 2591 statistics = True 2592 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2593 statistics = False 2594 else: 2595 statistics = None 2596 2597 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2598 2599 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2600 if self._match_text_seq("SQL"): 2601 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2602 return None 2603 2604 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2605 if self._match_text_seq("SQL", "DATA"): 2606 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2607 return None 2608 2609 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2610 if self._match_text_seq("PRIMARY", "INDEX"): 2611 return exp.NoPrimaryIndexProperty() 2612 if self._match_text_seq("SQL"): 2613 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2614 return None 2615 2616 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2617 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2618 return exp.OnCommitProperty() 2619 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2620 return exp.OnCommitProperty(delete=True) 2621 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2622 2623 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2624 if self._match_text_seq("SQL", "DATA"): 2625 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2626 return None 2627 2628 def _parse_distkey(self) -> exp.DistKeyProperty: 2629 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2630 2631 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2632 table = self._parse_table(schema=True) 2633 2634 options = [] 2635 while self._match_texts(("INCLUDING", "EXCLUDING")): 2636 this = self._prev.text.upper() 2637 2638 id_var = self._parse_id_var() 2639 if not id_var: 2640 return None 2641 2642 options.append( 2643 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2644 ) 2645 2646 return self.expression(exp.LikeProperty, this=table, expressions=options) 2647 2648 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2649 return self.expression( 2650 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2651 ) 2652 2653 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2654 self._match(TokenType.EQ) 2655 return self.expression( 2656 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2657 ) 2658 2659 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2660 self._match_text_seq("WITH", "CONNECTION") 2661 return self.expression( 2662 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2663 ) 2664 2665 def _parse_returns(self) -> exp.ReturnsProperty: 2666 value: t.Optional[exp.Expression] 2667 null = None 2668 is_table = self._match(TokenType.TABLE) 2669 2670 if is_table: 2671 if self._match(TokenType.LT): 2672 value = self.expression( 2673 exp.Schema, 2674 this="TABLE", 2675 expressions=self._parse_csv(self._parse_struct_types), 2676 ) 2677 if not self._match(TokenType.GT): 2678 self.raise_error("Expecting >") 2679 else: 2680 value = self._parse_schema(exp.var("TABLE")) 2681 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2682 null = True 2683 value = None 2684 else: 2685 value = self._parse_types() 2686 2687 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2688 2689 def _parse_describe(self) -> exp.Describe: 2690 kind = self._match_set(self.CREATABLES) and self._prev.text 2691 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2692 if self._match(TokenType.DOT): 2693 style = None 2694 self._retreat(self._index - 2) 2695 2696 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2697 2698 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2699 this = self._parse_statement() 2700 else: 2701 this = self._parse_table(schema=True) 2702 2703 properties = self._parse_properties() 2704 expressions = properties.expressions if properties else None 2705 partition = self._parse_partition() 2706 return self.expression( 2707 exp.Describe, 2708 this=this, 2709 style=style, 2710 kind=kind, 2711 expressions=expressions, 2712 partition=partition, 2713 format=format, 2714 ) 2715 2716 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2717 kind = self._prev.text.upper() 2718 expressions = [] 2719 2720 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2721 if self._match(TokenType.WHEN): 2722 expression = self._parse_disjunction() 2723 self._match(TokenType.THEN) 2724 else: 2725 expression = None 2726 2727 else_ = self._match(TokenType.ELSE) 2728 2729 if not self._match(TokenType.INTO): 2730 return None 2731 2732 return self.expression( 2733 exp.ConditionalInsert, 2734 this=self.expression( 2735 exp.Insert, 2736 this=self._parse_table(schema=True), 2737 expression=self._parse_derived_table_values(), 2738 ), 2739 expression=expression, 2740 else_=else_, 2741 ) 2742 2743 expression = parse_conditional_insert() 2744 while expression is not None: 2745 expressions.append(expression) 2746 expression = parse_conditional_insert() 2747 2748 return self.expression( 2749 exp.MultitableInserts, 2750 kind=kind, 2751 comments=comments, 2752 expressions=expressions, 2753 source=self._parse_table(), 2754 ) 2755 2756 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2757 comments = [] 2758 hint = self._parse_hint() 2759 overwrite = self._match(TokenType.OVERWRITE) 2760 ignore = self._match(TokenType.IGNORE) 2761 local = self._match_text_seq("LOCAL") 2762 alternative = None 2763 is_function = None 2764 2765 if self._match_text_seq("DIRECTORY"): 2766 this: t.Optional[exp.Expression] = self.expression( 2767 exp.Directory, 2768 this=self._parse_var_or_string(), 2769 local=local, 2770 row_format=self._parse_row_format(match_row=True), 2771 ) 2772 else: 2773 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2774 comments += ensure_list(self._prev_comments) 2775 return self._parse_multitable_inserts(comments) 2776 2777 if self._match(TokenType.OR): 2778 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2779 2780 self._match(TokenType.INTO) 2781 comments += ensure_list(self._prev_comments) 2782 self._match(TokenType.TABLE) 2783 is_function = self._match(TokenType.FUNCTION) 2784 2785 this = ( 2786 self._parse_table(schema=True, parse_partition=True) 2787 if not is_function 2788 else self._parse_function() 2789 ) 2790 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2791 this.set("alias", self._parse_table_alias()) 2792 2793 returning = self._parse_returning() 2794 2795 return self.expression( 2796 exp.Insert, 2797 comments=comments, 2798 hint=hint, 2799 is_function=is_function, 2800 this=this, 2801 stored=self._match_text_seq("STORED") and self._parse_stored(), 2802 by_name=self._match_text_seq("BY", "NAME"), 2803 exists=self._parse_exists(), 2804 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2805 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2806 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2807 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2808 conflict=self._parse_on_conflict(), 2809 returning=returning or self._parse_returning(), 2810 overwrite=overwrite, 2811 alternative=alternative, 2812 ignore=ignore, 2813 source=self._match(TokenType.TABLE) and self._parse_table(), 2814 ) 2815 2816 def _parse_kill(self) -> exp.Kill: 2817 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2818 2819 return self.expression( 2820 exp.Kill, 2821 this=self._parse_primary(), 2822 kind=kind, 2823 ) 2824 2825 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2826 conflict = self._match_text_seq("ON", "CONFLICT") 2827 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2828 2829 if not conflict and not duplicate: 2830 return None 2831 2832 conflict_keys = None 2833 constraint = None 2834 2835 if conflict: 2836 if self._match_text_seq("ON", "CONSTRAINT"): 2837 constraint = self._parse_id_var() 2838 elif self._match(TokenType.L_PAREN): 2839 conflict_keys = self._parse_csv(self._parse_id_var) 2840 self._match_r_paren() 2841 2842 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2843 if self._prev.token_type == TokenType.UPDATE: 2844 self._match(TokenType.SET) 2845 expressions = self._parse_csv(self._parse_equality) 2846 else: 2847 expressions = None 2848 2849 return self.expression( 2850 exp.OnConflict, 2851 duplicate=duplicate, 2852 expressions=expressions, 2853 action=action, 2854 conflict_keys=conflict_keys, 2855 constraint=constraint, 2856 where=self._parse_where(), 2857 ) 2858 2859 def _parse_returning(self) -> t.Optional[exp.Returning]: 2860 if not self._match(TokenType.RETURNING): 2861 return None 2862 return self.expression( 2863 exp.Returning, 2864 expressions=self._parse_csv(self._parse_expression), 2865 into=self._match(TokenType.INTO) and self._parse_table_part(), 2866 ) 2867 2868 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2869 if not self._match(TokenType.FORMAT): 2870 return None 2871 return self._parse_row_format() 2872 2873 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2874 index = self._index 2875 with_ = with_ or self._match_text_seq("WITH") 2876 2877 if not self._match(TokenType.SERDE_PROPERTIES): 2878 self._retreat(index) 2879 return None 2880 return self.expression( 2881 exp.SerdeProperties, 2882 **{ # type: ignore 2883 "expressions": self._parse_wrapped_properties(), 2884 "with": with_, 2885 }, 2886 ) 2887 2888 def _parse_row_format( 2889 self, match_row: bool = False 2890 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2891 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2892 return None 2893 2894 if self._match_text_seq("SERDE"): 2895 this = self._parse_string() 2896 2897 serde_properties = self._parse_serde_properties() 2898 2899 return self.expression( 2900 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2901 ) 2902 2903 self._match_text_seq("DELIMITED") 2904 2905 kwargs = {} 2906 2907 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2908 kwargs["fields"] = self._parse_string() 2909 if self._match_text_seq("ESCAPED", "BY"): 2910 kwargs["escaped"] = self._parse_string() 2911 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2912 kwargs["collection_items"] = self._parse_string() 2913 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2914 kwargs["map_keys"] = self._parse_string() 2915 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2916 kwargs["lines"] = self._parse_string() 2917 if self._match_text_seq("NULL", "DEFINED", "AS"): 2918 kwargs["null"] = self._parse_string() 2919 2920 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2921 2922 def _parse_load(self) -> exp.LoadData | exp.Command: 2923 if self._match_text_seq("DATA"): 2924 local = self._match_text_seq("LOCAL") 2925 self._match_text_seq("INPATH") 2926 inpath = self._parse_string() 2927 overwrite = self._match(TokenType.OVERWRITE) 2928 self._match_pair(TokenType.INTO, TokenType.TABLE) 2929 2930 return self.expression( 2931 exp.LoadData, 2932 this=self._parse_table(schema=True), 2933 local=local, 2934 overwrite=overwrite, 2935 inpath=inpath, 2936 partition=self._parse_partition(), 2937 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2938 serde=self._match_text_seq("SERDE") and self._parse_string(), 2939 ) 2940 return self._parse_as_command(self._prev) 2941 2942 def _parse_delete(self) -> exp.Delete: 2943 # This handles MySQL's "Multiple-Table Syntax" 2944 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2945 tables = None 2946 if not self._match(TokenType.FROM, advance=False): 2947 tables = self._parse_csv(self._parse_table) or None 2948 2949 returning = self._parse_returning() 2950 2951 return self.expression( 2952 exp.Delete, 2953 tables=tables, 2954 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2955 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2956 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2957 where=self._parse_where(), 2958 returning=returning or self._parse_returning(), 2959 limit=self._parse_limit(), 2960 ) 2961 2962 def _parse_update(self) -> exp.Update: 2963 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2964 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2965 returning = self._parse_returning() 2966 return self.expression( 2967 exp.Update, 2968 **{ # type: ignore 2969 "this": this, 2970 "expressions": expressions, 2971 "from": self._parse_from(joins=True), 2972 "where": self._parse_where(), 2973 "returning": returning or self._parse_returning(), 2974 "order": self._parse_order(), 2975 "limit": self._parse_limit(), 2976 }, 2977 ) 2978 2979 def _parse_use(self) -> exp.Use: 2980 return self.expression( 2981 exp.Use, 2982 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2983 this=self._parse_table(schema=False), 2984 ) 2985 2986 def _parse_uncache(self) -> exp.Uncache: 2987 if not self._match(TokenType.TABLE): 2988 self.raise_error("Expecting TABLE after UNCACHE") 2989 2990 return self.expression( 2991 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2992 ) 2993 2994 def _parse_cache(self) -> exp.Cache: 2995 lazy = self._match_text_seq("LAZY") 2996 self._match(TokenType.TABLE) 2997 table = self._parse_table(schema=True) 2998 2999 options = [] 3000 if self._match_text_seq("OPTIONS"): 3001 self._match_l_paren() 3002 k = self._parse_string() 3003 self._match(TokenType.EQ) 3004 v = self._parse_string() 3005 options = [k, v] 3006 self._match_r_paren() 3007 3008 self._match(TokenType.ALIAS) 3009 return self.expression( 3010 exp.Cache, 3011 this=table, 3012 lazy=lazy, 3013 options=options, 3014 expression=self._parse_select(nested=True), 3015 ) 3016 3017 def _parse_partition(self) -> t.Optional[exp.Partition]: 3018 if not self._match_texts(self.PARTITION_KEYWORDS): 3019 return None 3020 3021 return self.expression( 3022 exp.Partition, 3023 subpartition=self._prev.text.upper() == "SUBPARTITION", 3024 expressions=self._parse_wrapped_csv(self._parse_assignment), 3025 ) 3026 3027 def _parse_value(self) -> t.Optional[exp.Tuple]: 3028 def _parse_value_expression() -> t.Optional[exp.Expression]: 3029 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3030 return exp.var(self._prev.text.upper()) 3031 return self._parse_expression() 3032 3033 if self._match(TokenType.L_PAREN): 3034 expressions = self._parse_csv(_parse_value_expression) 3035 self._match_r_paren() 3036 return self.expression(exp.Tuple, expressions=expressions) 3037 3038 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3039 expression = self._parse_expression() 3040 if expression: 3041 return self.expression(exp.Tuple, expressions=[expression]) 3042 return None 3043 3044 def _parse_projections(self) -> t.List[exp.Expression]: 3045 return self._parse_expressions() 3046 3047 def _parse_select( 3048 self, 3049 nested: bool = False, 3050 table: bool = False, 3051 parse_subquery_alias: bool = True, 3052 parse_set_operation: bool = True, 3053 ) -> t.Optional[exp.Expression]: 3054 cte = self._parse_with() 3055 3056 if cte: 3057 this = self._parse_statement() 3058 3059 if not this: 3060 self.raise_error("Failed to parse any statement following CTE") 3061 return cte 3062 3063 if "with" in this.arg_types: 3064 this.set("with", cte) 3065 else: 3066 self.raise_error(f"{this.key} does not support CTE") 3067 this = cte 3068 3069 return this 3070 3071 # duckdb supports leading with FROM x 3072 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3073 3074 if self._match(TokenType.SELECT): 3075 comments = self._prev_comments 3076 3077 hint = self._parse_hint() 3078 3079 if self._next and not self._next.token_type == TokenType.DOT: 3080 all_ = self._match(TokenType.ALL) 3081 distinct = self._match_set(self.DISTINCT_TOKENS) 3082 else: 3083 all_, distinct = None, None 3084 3085 kind = ( 3086 self._match(TokenType.ALIAS) 3087 and self._match_texts(("STRUCT", "VALUE")) 3088 and self._prev.text.upper() 3089 ) 3090 3091 if distinct: 3092 distinct = self.expression( 3093 exp.Distinct, 3094 on=self._parse_value() if self._match(TokenType.ON) else None, 3095 ) 3096 3097 if all_ and distinct: 3098 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3099 3100 operation_modifiers = [] 3101 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3102 operation_modifiers.append(exp.var(self._prev.text.upper())) 3103 3104 limit = self._parse_limit(top=True) 3105 projections = self._parse_projections() 3106 3107 this = self.expression( 3108 exp.Select, 3109 kind=kind, 3110 hint=hint, 3111 distinct=distinct, 3112 expressions=projections, 3113 limit=limit, 3114 operation_modifiers=operation_modifiers or None, 3115 ) 3116 this.comments = comments 3117 3118 into = self._parse_into() 3119 if into: 3120 this.set("into", into) 3121 3122 if not from_: 3123 from_ = self._parse_from() 3124 3125 if from_: 3126 this.set("from", from_) 3127 3128 this = self._parse_query_modifiers(this) 3129 elif (table or nested) and self._match(TokenType.L_PAREN): 3130 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3131 this = self._parse_simplified_pivot( 3132 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3133 ) 3134 elif self._match(TokenType.FROM): 3135 from_ = self._parse_from(skip_from_token=True) 3136 # Support parentheses for duckdb FROM-first syntax 3137 select = self._parse_select() 3138 if select: 3139 select.set("from", from_) 3140 this = select 3141 else: 3142 this = exp.select("*").from_(t.cast(exp.From, from_)) 3143 else: 3144 this = ( 3145 self._parse_table() 3146 if table 3147 else self._parse_select(nested=True, parse_set_operation=False) 3148 ) 3149 3150 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3151 # in case a modifier (e.g. join) is following 3152 if table and isinstance(this, exp.Values) and this.alias: 3153 alias = this.args["alias"].pop() 3154 this = exp.Table(this=this, alias=alias) 3155 3156 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3157 3158 self._match_r_paren() 3159 3160 # We return early here so that the UNION isn't attached to the subquery by the 3161 # following call to _parse_set_operations, but instead becomes the parent node 3162 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3163 elif self._match(TokenType.VALUES, advance=False): 3164 this = self._parse_derived_table_values() 3165 elif from_: 3166 this = exp.select("*").from_(from_.this, copy=False) 3167 elif self._match(TokenType.SUMMARIZE): 3168 table = self._match(TokenType.TABLE) 3169 this = self._parse_select() or self._parse_string() or self._parse_table() 3170 return self.expression(exp.Summarize, this=this, table=table) 3171 elif self._match(TokenType.DESCRIBE): 3172 this = self._parse_describe() 3173 elif self._match_text_seq("STREAM"): 3174 this = self._parse_function() 3175 if this: 3176 this = self.expression(exp.Stream, this=this) 3177 else: 3178 self._retreat(self._index - 1) 3179 else: 3180 this = None 3181 3182 return self._parse_set_operations(this) if parse_set_operation else this 3183 3184 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3185 self._match_text_seq("SEARCH") 3186 3187 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3188 3189 if not kind: 3190 return None 3191 3192 self._match_text_seq("FIRST", "BY") 3193 3194 return self.expression( 3195 exp.RecursiveWithSearch, 3196 kind=kind, 3197 this=self._parse_id_var(), 3198 expression=self._match_text_seq("SET") and self._parse_id_var(), 3199 using=self._match_text_seq("USING") and self._parse_id_var(), 3200 ) 3201 3202 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3203 if not skip_with_token and not self._match(TokenType.WITH): 3204 return None 3205 3206 comments = self._prev_comments 3207 recursive = self._match(TokenType.RECURSIVE) 3208 3209 last_comments = None 3210 expressions = [] 3211 while True: 3212 cte = self._parse_cte() 3213 if isinstance(cte, exp.CTE): 3214 expressions.append(cte) 3215 if last_comments: 3216 cte.add_comments(last_comments) 3217 3218 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3219 break 3220 else: 3221 self._match(TokenType.WITH) 3222 3223 last_comments = self._prev_comments 3224 3225 return self.expression( 3226 exp.With, 3227 comments=comments, 3228 expressions=expressions, 3229 recursive=recursive, 3230 search=self._parse_recursive_with_search(), 3231 ) 3232 3233 def _parse_cte(self) -> t.Optional[exp.CTE]: 3234 index = self._index 3235 3236 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3237 if not alias or not alias.this: 3238 self.raise_error("Expected CTE to have alias") 3239 3240 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3241 self._retreat(index) 3242 return None 3243 3244 comments = self._prev_comments 3245 3246 if self._match_text_seq("NOT", "MATERIALIZED"): 3247 materialized = False 3248 elif self._match_text_seq("MATERIALIZED"): 3249 materialized = True 3250 else: 3251 materialized = None 3252 3253 cte = self.expression( 3254 exp.CTE, 3255 this=self._parse_wrapped(self._parse_statement), 3256 alias=alias, 3257 materialized=materialized, 3258 comments=comments, 3259 ) 3260 3261 if isinstance(cte.this, exp.Values): 3262 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3263 3264 return cte 3265 3266 def _parse_table_alias( 3267 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3268 ) -> t.Optional[exp.TableAlias]: 3269 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3270 # so this section tries to parse the clause version and if it fails, it treats the token 3271 # as an identifier (alias) 3272 if self._can_parse_limit_or_offset(): 3273 return None 3274 3275 any_token = self._match(TokenType.ALIAS) 3276 alias = ( 3277 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3278 or self._parse_string_as_identifier() 3279 ) 3280 3281 index = self._index 3282 if self._match(TokenType.L_PAREN): 3283 columns = self._parse_csv(self._parse_function_parameter) 3284 self._match_r_paren() if columns else self._retreat(index) 3285 else: 3286 columns = None 3287 3288 if not alias and not columns: 3289 return None 3290 3291 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3292 3293 # We bubble up comments from the Identifier to the TableAlias 3294 if isinstance(alias, exp.Identifier): 3295 table_alias.add_comments(alias.pop_comments()) 3296 3297 return table_alias 3298 3299 def _parse_subquery( 3300 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3301 ) -> t.Optional[exp.Subquery]: 3302 if not this: 3303 return None 3304 3305 return self.expression( 3306 exp.Subquery, 3307 this=this, 3308 pivots=self._parse_pivots(), 3309 alias=self._parse_table_alias() if parse_alias else None, 3310 sample=self._parse_table_sample(), 3311 ) 3312 3313 def _implicit_unnests_to_explicit(self, this: E) -> E: 3314 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3315 3316 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3317 for i, join in enumerate(this.args.get("joins") or []): 3318 table = join.this 3319 normalized_table = table.copy() 3320 normalized_table.meta["maybe_column"] = True 3321 normalized_table = _norm(normalized_table, dialect=self.dialect) 3322 3323 if isinstance(table, exp.Table) and not join.args.get("on"): 3324 if normalized_table.parts[0].name in refs: 3325 table_as_column = table.to_column() 3326 unnest = exp.Unnest(expressions=[table_as_column]) 3327 3328 # Table.to_column creates a parent Alias node that we want to convert to 3329 # a TableAlias and attach to the Unnest, so it matches the parser's output 3330 if isinstance(table.args.get("alias"), exp.TableAlias): 3331 table_as_column.replace(table_as_column.this) 3332 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3333 3334 table.replace(unnest) 3335 3336 refs.add(normalized_table.alias_or_name) 3337 3338 return this 3339 3340 def _parse_query_modifiers( 3341 self, this: t.Optional[exp.Expression] 3342 ) -> t.Optional[exp.Expression]: 3343 if isinstance(this, (exp.Query, exp.Table)): 3344 for join in self._parse_joins(): 3345 this.append("joins", join) 3346 for lateral in iter(self._parse_lateral, None): 3347 this.append("laterals", lateral) 3348 3349 while True: 3350 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3351 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3352 key, expression = parser(self) 3353 3354 if expression: 3355 this.set(key, expression) 3356 if key == "limit": 3357 offset = expression.args.pop("offset", None) 3358 3359 if offset: 3360 offset = exp.Offset(expression=offset) 3361 this.set("offset", offset) 3362 3363 limit_by_expressions = expression.expressions 3364 expression.set("expressions", None) 3365 offset.set("expressions", limit_by_expressions) 3366 continue 3367 break 3368 3369 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3370 this = self._implicit_unnests_to_explicit(this) 3371 3372 return this 3373 3374 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3375 start = self._curr 3376 while self._curr: 3377 self._advance() 3378 3379 end = self._tokens[self._index - 1] 3380 return exp.Hint(expressions=[self._find_sql(start, end)]) 3381 3382 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3383 return self._parse_function_call() 3384 3385 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3386 start_index = self._index 3387 should_fallback_to_string = False 3388 3389 hints = [] 3390 try: 3391 for hint in iter( 3392 lambda: self._parse_csv( 3393 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3394 ), 3395 [], 3396 ): 3397 hints.extend(hint) 3398 except ParseError: 3399 should_fallback_to_string = True 3400 3401 if should_fallback_to_string or self._curr: 3402 self._retreat(start_index) 3403 return self._parse_hint_fallback_to_string() 3404 3405 return self.expression(exp.Hint, expressions=hints) 3406 3407 def _parse_hint(self) -> t.Optional[exp.Hint]: 3408 if self._match(TokenType.HINT) and self._prev_comments: 3409 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3410 3411 return None 3412 3413 def _parse_into(self) -> t.Optional[exp.Into]: 3414 if not self._match(TokenType.INTO): 3415 return None 3416 3417 temp = self._match(TokenType.TEMPORARY) 3418 unlogged = self._match_text_seq("UNLOGGED") 3419 self._match(TokenType.TABLE) 3420 3421 return self.expression( 3422 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3423 ) 3424 3425 def _parse_from( 3426 self, joins: bool = False, skip_from_token: bool = False 3427 ) -> t.Optional[exp.From]: 3428 if not skip_from_token and not self._match(TokenType.FROM): 3429 return None 3430 3431 return self.expression( 3432 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3433 ) 3434 3435 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3436 return self.expression( 3437 exp.MatchRecognizeMeasure, 3438 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3439 this=self._parse_expression(), 3440 ) 3441 3442 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3443 if not self._match(TokenType.MATCH_RECOGNIZE): 3444 return None 3445 3446 self._match_l_paren() 3447 3448 partition = self._parse_partition_by() 3449 order = self._parse_order() 3450 3451 measures = ( 3452 self._parse_csv(self._parse_match_recognize_measure) 3453 if self._match_text_seq("MEASURES") 3454 else None 3455 ) 3456 3457 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3458 rows = exp.var("ONE ROW PER MATCH") 3459 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3460 text = "ALL ROWS PER MATCH" 3461 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3462 text += " SHOW EMPTY MATCHES" 3463 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3464 text += " OMIT EMPTY MATCHES" 3465 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3466 text += " WITH UNMATCHED ROWS" 3467 rows = exp.var(text) 3468 else: 3469 rows = None 3470 3471 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3472 text = "AFTER MATCH SKIP" 3473 if self._match_text_seq("PAST", "LAST", "ROW"): 3474 text += " PAST LAST ROW" 3475 elif self._match_text_seq("TO", "NEXT", "ROW"): 3476 text += " TO NEXT ROW" 3477 elif self._match_text_seq("TO", "FIRST"): 3478 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3479 elif self._match_text_seq("TO", "LAST"): 3480 text += f" TO LAST {self._advance_any().text}" # type: ignore 3481 after = exp.var(text) 3482 else: 3483 after = None 3484 3485 if self._match_text_seq("PATTERN"): 3486 self._match_l_paren() 3487 3488 if not self._curr: 3489 self.raise_error("Expecting )", self._curr) 3490 3491 paren = 1 3492 start = self._curr 3493 3494 while self._curr and paren > 0: 3495 if self._curr.token_type == TokenType.L_PAREN: 3496 paren += 1 3497 if self._curr.token_type == TokenType.R_PAREN: 3498 paren -= 1 3499 3500 end = self._prev 3501 self._advance() 3502 3503 if paren > 0: 3504 self.raise_error("Expecting )", self._curr) 3505 3506 pattern = exp.var(self._find_sql(start, end)) 3507 else: 3508 pattern = None 3509 3510 define = ( 3511 self._parse_csv(self._parse_name_as_expression) 3512 if self._match_text_seq("DEFINE") 3513 else None 3514 ) 3515 3516 self._match_r_paren() 3517 3518 return self.expression( 3519 exp.MatchRecognize, 3520 partition_by=partition, 3521 order=order, 3522 measures=measures, 3523 rows=rows, 3524 after=after, 3525 pattern=pattern, 3526 define=define, 3527 alias=self._parse_table_alias(), 3528 ) 3529 3530 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3531 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3532 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3533 cross_apply = False 3534 3535 if cross_apply is not None: 3536 this = self._parse_select(table=True) 3537 view = None 3538 outer = None 3539 elif self._match(TokenType.LATERAL): 3540 this = self._parse_select(table=True) 3541 view = self._match(TokenType.VIEW) 3542 outer = self._match(TokenType.OUTER) 3543 else: 3544 return None 3545 3546 if not this: 3547 this = ( 3548 self._parse_unnest() 3549 or self._parse_function() 3550 or self._parse_id_var(any_token=False) 3551 ) 3552 3553 while self._match(TokenType.DOT): 3554 this = exp.Dot( 3555 this=this, 3556 expression=self._parse_function() or self._parse_id_var(any_token=False), 3557 ) 3558 3559 if view: 3560 table = self._parse_id_var(any_token=False) 3561 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3562 table_alias: t.Optional[exp.TableAlias] = self.expression( 3563 exp.TableAlias, this=table, columns=columns 3564 ) 3565 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3566 # We move the alias from the lateral's child node to the lateral itself 3567 table_alias = this.args["alias"].pop() 3568 else: 3569 table_alias = self._parse_table_alias() 3570 3571 return self.expression( 3572 exp.Lateral, 3573 this=this, 3574 view=view, 3575 outer=outer, 3576 alias=table_alias, 3577 cross_apply=cross_apply, 3578 ) 3579 3580 def _parse_join_parts( 3581 self, 3582 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3583 return ( 3584 self._match_set(self.JOIN_METHODS) and self._prev, 3585 self._match_set(self.JOIN_SIDES) and self._prev, 3586 self._match_set(self.JOIN_KINDS) and self._prev, 3587 ) 3588 3589 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3590 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3591 this = self._parse_column() 3592 if isinstance(this, exp.Column): 3593 return this.this 3594 return this 3595 3596 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3597 3598 def _parse_join( 3599 self, skip_join_token: bool = False, parse_bracket: bool = False 3600 ) -> t.Optional[exp.Join]: 3601 if self._match(TokenType.COMMA): 3602 return self.expression(exp.Join, this=self._parse_table()) 3603 3604 index = self._index 3605 method, side, kind = self._parse_join_parts() 3606 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3607 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3608 3609 if not skip_join_token and not join: 3610 self._retreat(index) 3611 kind = None 3612 method = None 3613 side = None 3614 3615 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3616 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3617 3618 if not skip_join_token and not join and not outer_apply and not cross_apply: 3619 return None 3620 3621 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3622 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3623 kwargs["expressions"] = self._parse_csv( 3624 lambda: self._parse_table(parse_bracket=parse_bracket) 3625 ) 3626 3627 if method: 3628 kwargs["method"] = method.text 3629 if side: 3630 kwargs["side"] = side.text 3631 if kind: 3632 kwargs["kind"] = kind.text 3633 if hint: 3634 kwargs["hint"] = hint 3635 3636 if self._match(TokenType.MATCH_CONDITION): 3637 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3638 3639 if self._match(TokenType.ON): 3640 kwargs["on"] = self._parse_assignment() 3641 elif self._match(TokenType.USING): 3642 kwargs["using"] = self._parse_using_identifiers() 3643 elif ( 3644 not (outer_apply or cross_apply) 3645 and not isinstance(kwargs["this"], exp.Unnest) 3646 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3647 ): 3648 index = self._index 3649 joins: t.Optional[list] = list(self._parse_joins()) 3650 3651 if joins and self._match(TokenType.ON): 3652 kwargs["on"] = self._parse_assignment() 3653 elif joins and self._match(TokenType.USING): 3654 kwargs["using"] = self._parse_using_identifiers() 3655 else: 3656 joins = None 3657 self._retreat(index) 3658 3659 kwargs["this"].set("joins", joins if joins else None) 3660 3661 comments = [c for token in (method, side, kind) if token for c in token.comments] 3662 return self.expression(exp.Join, comments=comments, **kwargs) 3663 3664 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3665 this = self._parse_assignment() 3666 3667 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3668 return this 3669 3670 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3671 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3672 3673 return this 3674 3675 def _parse_index_params(self) -> exp.IndexParameters: 3676 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3677 3678 if self._match(TokenType.L_PAREN, advance=False): 3679 columns = self._parse_wrapped_csv(self._parse_with_operator) 3680 else: 3681 columns = None 3682 3683 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3684 partition_by = self._parse_partition_by() 3685 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3686 tablespace = ( 3687 self._parse_var(any_token=True) 3688 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3689 else None 3690 ) 3691 where = self._parse_where() 3692 3693 on = self._parse_field() if self._match(TokenType.ON) else None 3694 3695 return self.expression( 3696 exp.IndexParameters, 3697 using=using, 3698 columns=columns, 3699 include=include, 3700 partition_by=partition_by, 3701 where=where, 3702 with_storage=with_storage, 3703 tablespace=tablespace, 3704 on=on, 3705 ) 3706 3707 def _parse_index( 3708 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3709 ) -> t.Optional[exp.Index]: 3710 if index or anonymous: 3711 unique = None 3712 primary = None 3713 amp = None 3714 3715 self._match(TokenType.ON) 3716 self._match(TokenType.TABLE) # hive 3717 table = self._parse_table_parts(schema=True) 3718 else: 3719 unique = self._match(TokenType.UNIQUE) 3720 primary = self._match_text_seq("PRIMARY") 3721 amp = self._match_text_seq("AMP") 3722 3723 if not self._match(TokenType.INDEX): 3724 return None 3725 3726 index = self._parse_id_var() 3727 table = None 3728 3729 params = self._parse_index_params() 3730 3731 return self.expression( 3732 exp.Index, 3733 this=index, 3734 table=table, 3735 unique=unique, 3736 primary=primary, 3737 amp=amp, 3738 params=params, 3739 ) 3740 3741 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3742 hints: t.List[exp.Expression] = [] 3743 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3744 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3745 hints.append( 3746 self.expression( 3747 exp.WithTableHint, 3748 expressions=self._parse_csv( 3749 lambda: self._parse_function() or self._parse_var(any_token=True) 3750 ), 3751 ) 3752 ) 3753 self._match_r_paren() 3754 else: 3755 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3756 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3757 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3758 3759 self._match_set((TokenType.INDEX, TokenType.KEY)) 3760 if self._match(TokenType.FOR): 3761 hint.set("target", self._advance_any() and self._prev.text.upper()) 3762 3763 hint.set("expressions", self._parse_wrapped_id_vars()) 3764 hints.append(hint) 3765 3766 return hints or None 3767 3768 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3769 return ( 3770 (not schema and self._parse_function(optional_parens=False)) 3771 or self._parse_id_var(any_token=False) 3772 or self._parse_string_as_identifier() 3773 or self._parse_placeholder() 3774 ) 3775 3776 def _parse_table_parts( 3777 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3778 ) -> exp.Table: 3779 catalog = None 3780 db = None 3781 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3782 3783 while self._match(TokenType.DOT): 3784 if catalog: 3785 # This allows nesting the table in arbitrarily many dot expressions if needed 3786 table = self.expression( 3787 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3788 ) 3789 else: 3790 catalog = db 3791 db = table 3792 # "" used for tsql FROM a..b case 3793 table = self._parse_table_part(schema=schema) or "" 3794 3795 if ( 3796 wildcard 3797 and self._is_connected() 3798 and (isinstance(table, exp.Identifier) or not table) 3799 and self._match(TokenType.STAR) 3800 ): 3801 if isinstance(table, exp.Identifier): 3802 table.args["this"] += "*" 3803 else: 3804 table = exp.Identifier(this="*") 3805 3806 # We bubble up comments from the Identifier to the Table 3807 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3808 3809 if is_db_reference: 3810 catalog = db 3811 db = table 3812 table = None 3813 3814 if not table and not is_db_reference: 3815 self.raise_error(f"Expected table name but got {self._curr}") 3816 if not db and is_db_reference: 3817 self.raise_error(f"Expected database name but got {self._curr}") 3818 3819 table = self.expression( 3820 exp.Table, 3821 comments=comments, 3822 this=table, 3823 db=db, 3824 catalog=catalog, 3825 ) 3826 3827 changes = self._parse_changes() 3828 if changes: 3829 table.set("changes", changes) 3830 3831 at_before = self._parse_historical_data() 3832 if at_before: 3833 table.set("when", at_before) 3834 3835 pivots = self._parse_pivots() 3836 if pivots: 3837 table.set("pivots", pivots) 3838 3839 return table 3840 3841 def _parse_table( 3842 self, 3843 schema: bool = False, 3844 joins: bool = False, 3845 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3846 parse_bracket: bool = False, 3847 is_db_reference: bool = False, 3848 parse_partition: bool = False, 3849 ) -> t.Optional[exp.Expression]: 3850 lateral = self._parse_lateral() 3851 if lateral: 3852 return lateral 3853 3854 unnest = self._parse_unnest() 3855 if unnest: 3856 return unnest 3857 3858 values = self._parse_derived_table_values() 3859 if values: 3860 return values 3861 3862 subquery = self._parse_select(table=True) 3863 if subquery: 3864 if not subquery.args.get("pivots"): 3865 subquery.set("pivots", self._parse_pivots()) 3866 return subquery 3867 3868 bracket = parse_bracket and self._parse_bracket(None) 3869 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3870 3871 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3872 self._parse_table 3873 ) 3874 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3875 3876 only = self._match(TokenType.ONLY) 3877 3878 this = t.cast( 3879 exp.Expression, 3880 bracket 3881 or rows_from 3882 or self._parse_bracket( 3883 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3884 ), 3885 ) 3886 3887 if only: 3888 this.set("only", only) 3889 3890 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3891 self._match_text_seq("*") 3892 3893 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3894 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3895 this.set("partition", self._parse_partition()) 3896 3897 if schema: 3898 return self._parse_schema(this=this) 3899 3900 version = self._parse_version() 3901 3902 if version: 3903 this.set("version", version) 3904 3905 if self.dialect.ALIAS_POST_TABLESAMPLE: 3906 this.set("sample", self._parse_table_sample()) 3907 3908 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3909 if alias: 3910 this.set("alias", alias) 3911 3912 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3913 return self.expression( 3914 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3915 ) 3916 3917 this.set("hints", self._parse_table_hints()) 3918 3919 if not this.args.get("pivots"): 3920 this.set("pivots", self._parse_pivots()) 3921 3922 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3923 this.set("sample", self._parse_table_sample()) 3924 3925 if joins: 3926 for join in self._parse_joins(): 3927 this.append("joins", join) 3928 3929 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3930 this.set("ordinality", True) 3931 this.set("alias", self._parse_table_alias()) 3932 3933 return this 3934 3935 def _parse_version(self) -> t.Optional[exp.Version]: 3936 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3937 this = "TIMESTAMP" 3938 elif self._match(TokenType.VERSION_SNAPSHOT): 3939 this = "VERSION" 3940 else: 3941 return None 3942 3943 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3944 kind = self._prev.text.upper() 3945 start = self._parse_bitwise() 3946 self._match_texts(("TO", "AND")) 3947 end = self._parse_bitwise() 3948 expression: t.Optional[exp.Expression] = self.expression( 3949 exp.Tuple, expressions=[start, end] 3950 ) 3951 elif self._match_text_seq("CONTAINED", "IN"): 3952 kind = "CONTAINED IN" 3953 expression = self.expression( 3954 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3955 ) 3956 elif self._match(TokenType.ALL): 3957 kind = "ALL" 3958 expression = None 3959 else: 3960 self._match_text_seq("AS", "OF") 3961 kind = "AS OF" 3962 expression = self._parse_type() 3963 3964 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3965 3966 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3967 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3968 index = self._index 3969 historical_data = None 3970 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3971 this = self._prev.text.upper() 3972 kind = ( 3973 self._match(TokenType.L_PAREN) 3974 and self._match_texts(self.HISTORICAL_DATA_KIND) 3975 and self._prev.text.upper() 3976 ) 3977 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3978 3979 if expression: 3980 self._match_r_paren() 3981 historical_data = self.expression( 3982 exp.HistoricalData, this=this, kind=kind, expression=expression 3983 ) 3984 else: 3985 self._retreat(index) 3986 3987 return historical_data 3988 3989 def _parse_changes(self) -> t.Optional[exp.Changes]: 3990 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3991 return None 3992 3993 information = self._parse_var(any_token=True) 3994 self._match_r_paren() 3995 3996 return self.expression( 3997 exp.Changes, 3998 information=information, 3999 at_before=self._parse_historical_data(), 4000 end=self._parse_historical_data(), 4001 ) 4002 4003 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4004 if not self._match(TokenType.UNNEST): 4005 return None 4006 4007 expressions = self._parse_wrapped_csv(self._parse_equality) 4008 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4009 4010 alias = self._parse_table_alias() if with_alias else None 4011 4012 if alias: 4013 if self.dialect.UNNEST_COLUMN_ONLY: 4014 if alias.args.get("columns"): 4015 self.raise_error("Unexpected extra column alias in unnest.") 4016 4017 alias.set("columns", [alias.this]) 4018 alias.set("this", None) 4019 4020 columns = alias.args.get("columns") or [] 4021 if offset and len(expressions) < len(columns): 4022 offset = columns.pop() 4023 4024 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4025 self._match(TokenType.ALIAS) 4026 offset = self._parse_id_var( 4027 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4028 ) or exp.to_identifier("offset") 4029 4030 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4031 4032 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4033 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4034 if not is_derived and not ( 4035 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4036 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4037 ): 4038 return None 4039 4040 expressions = self._parse_csv(self._parse_value) 4041 alias = self._parse_table_alias() 4042 4043 if is_derived: 4044 self._match_r_paren() 4045 4046 return self.expression( 4047 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4048 ) 4049 4050 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4051 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4052 as_modifier and self._match_text_seq("USING", "SAMPLE") 4053 ): 4054 return None 4055 4056 bucket_numerator = None 4057 bucket_denominator = None 4058 bucket_field = None 4059 percent = None 4060 size = None 4061 seed = None 4062 4063 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4064 matched_l_paren = self._match(TokenType.L_PAREN) 4065 4066 if self.TABLESAMPLE_CSV: 4067 num = None 4068 expressions = self._parse_csv(self._parse_primary) 4069 else: 4070 expressions = None 4071 num = ( 4072 self._parse_factor() 4073 if self._match(TokenType.NUMBER, advance=False) 4074 else self._parse_primary() or self._parse_placeholder() 4075 ) 4076 4077 if self._match_text_seq("BUCKET"): 4078 bucket_numerator = self._parse_number() 4079 self._match_text_seq("OUT", "OF") 4080 bucket_denominator = bucket_denominator = self._parse_number() 4081 self._match(TokenType.ON) 4082 bucket_field = self._parse_field() 4083 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4084 percent = num 4085 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4086 size = num 4087 else: 4088 percent = num 4089 4090 if matched_l_paren: 4091 self._match_r_paren() 4092 4093 if self._match(TokenType.L_PAREN): 4094 method = self._parse_var(upper=True) 4095 seed = self._match(TokenType.COMMA) and self._parse_number() 4096 self._match_r_paren() 4097 elif self._match_texts(("SEED", "REPEATABLE")): 4098 seed = self._parse_wrapped(self._parse_number) 4099 4100 if not method and self.DEFAULT_SAMPLING_METHOD: 4101 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4102 4103 return self.expression( 4104 exp.TableSample, 4105 expressions=expressions, 4106 method=method, 4107 bucket_numerator=bucket_numerator, 4108 bucket_denominator=bucket_denominator, 4109 bucket_field=bucket_field, 4110 percent=percent, 4111 size=size, 4112 seed=seed, 4113 ) 4114 4115 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4116 return list(iter(self._parse_pivot, None)) or None 4117 4118 def _parse_joins(self) -> t.Iterator[exp.Join]: 4119 return iter(self._parse_join, None) 4120 4121 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4122 if not self._match(TokenType.INTO): 4123 return None 4124 4125 return self.expression( 4126 exp.UnpivotColumns, 4127 this=self._match_text_seq("NAME") and self._parse_column(), 4128 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4129 ) 4130 4131 # https://duckdb.org/docs/sql/statements/pivot 4132 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4133 def _parse_on() -> t.Optional[exp.Expression]: 4134 this = self._parse_bitwise() 4135 4136 if self._match(TokenType.IN): 4137 # PIVOT ... ON col IN (row_val1, row_val2) 4138 return self._parse_in(this) 4139 if self._match(TokenType.ALIAS, advance=False): 4140 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4141 return self._parse_alias(this) 4142 4143 return this 4144 4145 this = self._parse_table() 4146 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4147 into = self._parse_unpivot_columns() 4148 using = self._match(TokenType.USING) and self._parse_csv( 4149 lambda: self._parse_alias(self._parse_function()) 4150 ) 4151 group = self._parse_group() 4152 4153 return self.expression( 4154 exp.Pivot, 4155 this=this, 4156 expressions=expressions, 4157 using=using, 4158 group=group, 4159 unpivot=is_unpivot, 4160 into=into, 4161 ) 4162 4163 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4164 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4165 this = self._parse_select_or_expression() 4166 4167 self._match(TokenType.ALIAS) 4168 alias = self._parse_bitwise() 4169 if alias: 4170 if isinstance(alias, exp.Column) and not alias.db: 4171 alias = alias.this 4172 return self.expression(exp.PivotAlias, this=this, alias=alias) 4173 4174 return this 4175 4176 value = self._parse_column() 4177 4178 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4179 self.raise_error("Expecting IN (") 4180 4181 if self._match(TokenType.ANY): 4182 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4183 else: 4184 exprs = self._parse_csv(_parse_aliased_expression) 4185 4186 self._match_r_paren() 4187 return self.expression(exp.In, this=value, expressions=exprs) 4188 4189 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4190 index = self._index 4191 include_nulls = None 4192 4193 if self._match(TokenType.PIVOT): 4194 unpivot = False 4195 elif self._match(TokenType.UNPIVOT): 4196 unpivot = True 4197 4198 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4199 if self._match_text_seq("INCLUDE", "NULLS"): 4200 include_nulls = True 4201 elif self._match_text_seq("EXCLUDE", "NULLS"): 4202 include_nulls = False 4203 else: 4204 return None 4205 4206 expressions = [] 4207 4208 if not self._match(TokenType.L_PAREN): 4209 self._retreat(index) 4210 return None 4211 4212 if unpivot: 4213 expressions = self._parse_csv(self._parse_column) 4214 else: 4215 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4216 4217 if not expressions: 4218 self.raise_error("Failed to parse PIVOT's aggregation list") 4219 4220 if not self._match(TokenType.FOR): 4221 self.raise_error("Expecting FOR") 4222 4223 field = self._parse_pivot_in() 4224 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4225 self._parse_bitwise 4226 ) 4227 4228 self._match_r_paren() 4229 4230 pivot = self.expression( 4231 exp.Pivot, 4232 expressions=expressions, 4233 field=field, 4234 unpivot=unpivot, 4235 include_nulls=include_nulls, 4236 default_on_null=default_on_null, 4237 ) 4238 4239 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4240 pivot.set("alias", self._parse_table_alias()) 4241 4242 if not unpivot: 4243 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4244 4245 columns: t.List[exp.Expression] = [] 4246 pivot_field_expressions = pivot.args["field"].expressions 4247 4248 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4249 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4250 for fld in pivot_field_expressions: 4251 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4252 for name in names: 4253 if self.PREFIXED_PIVOT_COLUMNS: 4254 name = f"{name}_{field_name}" if name else field_name 4255 else: 4256 name = f"{field_name}_{name}" if name else field_name 4257 4258 columns.append(exp.to_identifier(name)) 4259 4260 pivot.set("columns", columns) 4261 4262 return pivot 4263 4264 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4265 return [agg.alias for agg in aggregations] 4266 4267 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4268 if not skip_where_token and not self._match(TokenType.PREWHERE): 4269 return None 4270 4271 return self.expression( 4272 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4273 ) 4274 4275 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4276 if not skip_where_token and not self._match(TokenType.WHERE): 4277 return None 4278 4279 return self.expression( 4280 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4281 ) 4282 4283 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4284 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4285 return None 4286 4287 elements: t.Dict[str, t.Any] = defaultdict(list) 4288 4289 if self._match(TokenType.ALL): 4290 elements["all"] = True 4291 elif self._match(TokenType.DISTINCT): 4292 elements["all"] = False 4293 4294 while True: 4295 index = self._index 4296 4297 elements["expressions"].extend( 4298 self._parse_csv( 4299 lambda: None 4300 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4301 else self._parse_assignment() 4302 ) 4303 ) 4304 4305 before_with_index = self._index 4306 with_prefix = self._match(TokenType.WITH) 4307 4308 if self._match(TokenType.ROLLUP): 4309 elements["rollup"].append( 4310 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4311 ) 4312 elif self._match(TokenType.CUBE): 4313 elements["cube"].append( 4314 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4315 ) 4316 elif self._match(TokenType.GROUPING_SETS): 4317 elements["grouping_sets"].append( 4318 self.expression( 4319 exp.GroupingSets, 4320 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4321 ) 4322 ) 4323 elif self._match_text_seq("TOTALS"): 4324 elements["totals"] = True # type: ignore 4325 4326 if before_with_index <= self._index <= before_with_index + 1: 4327 self._retreat(before_with_index) 4328 break 4329 4330 if index == self._index: 4331 break 4332 4333 return self.expression(exp.Group, **elements) # type: ignore 4334 4335 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4336 return self.expression( 4337 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4338 ) 4339 4340 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4341 if self._match(TokenType.L_PAREN): 4342 grouping_set = self._parse_csv(self._parse_column) 4343 self._match_r_paren() 4344 return self.expression(exp.Tuple, expressions=grouping_set) 4345 4346 return self._parse_column() 4347 4348 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4349 if not skip_having_token and not self._match(TokenType.HAVING): 4350 return None 4351 return self.expression(exp.Having, this=self._parse_assignment()) 4352 4353 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4354 if not self._match(TokenType.QUALIFY): 4355 return None 4356 return self.expression(exp.Qualify, this=self._parse_assignment()) 4357 4358 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4359 if skip_start_token: 4360 start = None 4361 elif self._match(TokenType.START_WITH): 4362 start = self._parse_assignment() 4363 else: 4364 return None 4365 4366 self._match(TokenType.CONNECT_BY) 4367 nocycle = self._match_text_seq("NOCYCLE") 4368 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4369 exp.Prior, this=self._parse_bitwise() 4370 ) 4371 connect = self._parse_assignment() 4372 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4373 4374 if not start and self._match(TokenType.START_WITH): 4375 start = self._parse_assignment() 4376 4377 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4378 4379 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4380 this = self._parse_id_var(any_token=True) 4381 if self._match(TokenType.ALIAS): 4382 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4383 return this 4384 4385 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4386 if self._match_text_seq("INTERPOLATE"): 4387 return self._parse_wrapped_csv(self._parse_name_as_expression) 4388 return None 4389 4390 def _parse_order( 4391 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4392 ) -> t.Optional[exp.Expression]: 4393 siblings = None 4394 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4395 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4396 return this 4397 4398 siblings = True 4399 4400 return self.expression( 4401 exp.Order, 4402 this=this, 4403 expressions=self._parse_csv(self._parse_ordered), 4404 siblings=siblings, 4405 ) 4406 4407 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4408 if not self._match(token): 4409 return None 4410 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4411 4412 def _parse_ordered( 4413 self, parse_method: t.Optional[t.Callable] = None 4414 ) -> t.Optional[exp.Ordered]: 4415 this = parse_method() if parse_method else self._parse_assignment() 4416 if not this: 4417 return None 4418 4419 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4420 this = exp.var("ALL") 4421 4422 asc = self._match(TokenType.ASC) 4423 desc = self._match(TokenType.DESC) or (asc and False) 4424 4425 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4426 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4427 4428 nulls_first = is_nulls_first or False 4429 explicitly_null_ordered = is_nulls_first or is_nulls_last 4430 4431 if ( 4432 not explicitly_null_ordered 4433 and ( 4434 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4435 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4436 ) 4437 and self.dialect.NULL_ORDERING != "nulls_are_last" 4438 ): 4439 nulls_first = True 4440 4441 if self._match_text_seq("WITH", "FILL"): 4442 with_fill = self.expression( 4443 exp.WithFill, 4444 **{ # type: ignore 4445 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4446 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4447 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4448 "interpolate": self._parse_interpolate(), 4449 }, 4450 ) 4451 else: 4452 with_fill = None 4453 4454 return self.expression( 4455 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4456 ) 4457 4458 def _parse_limit_options(self) -> exp.LimitOptions: 4459 percent = self._match(TokenType.PERCENT) 4460 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4461 self._match_text_seq("ONLY") 4462 with_ties = self._match_text_seq("WITH", "TIES") 4463 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4464 4465 def _parse_limit( 4466 self, 4467 this: t.Optional[exp.Expression] = None, 4468 top: bool = False, 4469 skip_limit_token: bool = False, 4470 ) -> t.Optional[exp.Expression]: 4471 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4472 comments = self._prev_comments 4473 if top: 4474 limit_paren = self._match(TokenType.L_PAREN) 4475 expression = self._parse_term() if limit_paren else self._parse_number() 4476 4477 if limit_paren: 4478 self._match_r_paren() 4479 4480 limit_options = self._parse_limit_options() 4481 else: 4482 limit_options = None 4483 expression = self._parse_term() 4484 4485 if self._match(TokenType.COMMA): 4486 offset = expression 4487 expression = self._parse_term() 4488 else: 4489 offset = None 4490 4491 limit_exp = self.expression( 4492 exp.Limit, 4493 this=this, 4494 expression=expression, 4495 offset=offset, 4496 comments=comments, 4497 limit_options=limit_options, 4498 expressions=self._parse_limit_by(), 4499 ) 4500 4501 return limit_exp 4502 4503 if self._match(TokenType.FETCH): 4504 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4505 direction = self._prev.text.upper() if direction else "FIRST" 4506 4507 count = self._parse_field(tokens=self.FETCH_TOKENS) 4508 4509 return self.expression( 4510 exp.Fetch, 4511 direction=direction, 4512 count=count, 4513 limit_options=self._parse_limit_options(), 4514 ) 4515 4516 return this 4517 4518 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4519 if not self._match(TokenType.OFFSET): 4520 return this 4521 4522 count = self._parse_term() 4523 self._match_set((TokenType.ROW, TokenType.ROWS)) 4524 4525 return self.expression( 4526 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4527 ) 4528 4529 def _can_parse_limit_or_offset(self) -> bool: 4530 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4531 return False 4532 4533 index = self._index 4534 result = bool( 4535 self._try_parse(self._parse_limit, retreat=True) 4536 or self._try_parse(self._parse_offset, retreat=True) 4537 ) 4538 self._retreat(index) 4539 return result 4540 4541 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4542 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4543 4544 def _parse_locks(self) -> t.List[exp.Lock]: 4545 locks = [] 4546 while True: 4547 if self._match_text_seq("FOR", "UPDATE"): 4548 update = True 4549 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4550 "LOCK", "IN", "SHARE", "MODE" 4551 ): 4552 update = False 4553 else: 4554 break 4555 4556 expressions = None 4557 if self._match_text_seq("OF"): 4558 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4559 4560 wait: t.Optional[bool | exp.Expression] = None 4561 if self._match_text_seq("NOWAIT"): 4562 wait = True 4563 elif self._match_text_seq("WAIT"): 4564 wait = self._parse_primary() 4565 elif self._match_text_seq("SKIP", "LOCKED"): 4566 wait = False 4567 4568 locks.append( 4569 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4570 ) 4571 4572 return locks 4573 4574 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4575 while this and self._match_set(self.SET_OPERATIONS): 4576 token_type = self._prev.token_type 4577 4578 if token_type == TokenType.UNION: 4579 operation: t.Type[exp.SetOperation] = exp.Union 4580 elif token_type == TokenType.EXCEPT: 4581 operation = exp.Except 4582 else: 4583 operation = exp.Intersect 4584 4585 comments = self._prev.comments 4586 4587 if self._match(TokenType.DISTINCT): 4588 distinct: t.Optional[bool] = True 4589 elif self._match(TokenType.ALL): 4590 distinct = False 4591 else: 4592 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4593 if distinct is None: 4594 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4595 4596 by_name = self._match_text_seq("BY", "NAME") 4597 expression = self._parse_select(nested=True, parse_set_operation=False) 4598 4599 this = self.expression( 4600 operation, 4601 comments=comments, 4602 this=this, 4603 distinct=distinct, 4604 by_name=by_name, 4605 expression=expression, 4606 ) 4607 4608 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4609 expression = this.expression 4610 4611 if expression: 4612 for arg in self.SET_OP_MODIFIERS: 4613 expr = expression.args.get(arg) 4614 if expr: 4615 this.set(arg, expr.pop()) 4616 4617 return this 4618 4619 def _parse_expression(self) -> t.Optional[exp.Expression]: 4620 return self._parse_alias(self._parse_assignment()) 4621 4622 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4623 this = self._parse_disjunction() 4624 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4625 # This allows us to parse <non-identifier token> := <expr> 4626 this = exp.column( 4627 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4628 ) 4629 4630 while self._match_set(self.ASSIGNMENT): 4631 if isinstance(this, exp.Column) and len(this.parts) == 1: 4632 this = this.this 4633 4634 this = self.expression( 4635 self.ASSIGNMENT[self._prev.token_type], 4636 this=this, 4637 comments=self._prev_comments, 4638 expression=self._parse_assignment(), 4639 ) 4640 4641 return this 4642 4643 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4644 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4645 4646 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4647 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4648 4649 def _parse_equality(self) -> t.Optional[exp.Expression]: 4650 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4651 4652 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4653 return self._parse_tokens(self._parse_range, self.COMPARISON) 4654 4655 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4656 this = this or self._parse_bitwise() 4657 negate = self._match(TokenType.NOT) 4658 4659 if self._match_set(self.RANGE_PARSERS): 4660 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4661 if not expression: 4662 return this 4663 4664 this = expression 4665 elif self._match(TokenType.ISNULL): 4666 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4667 4668 # Postgres supports ISNULL and NOTNULL for conditions. 4669 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4670 if self._match(TokenType.NOTNULL): 4671 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4672 this = self.expression(exp.Not, this=this) 4673 4674 if negate: 4675 this = self._negate_range(this) 4676 4677 if self._match(TokenType.IS): 4678 this = self._parse_is(this) 4679 4680 return this 4681 4682 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4683 if not this: 4684 return this 4685 4686 return self.expression(exp.Not, this=this) 4687 4688 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4689 index = self._index - 1 4690 negate = self._match(TokenType.NOT) 4691 4692 if self._match_text_seq("DISTINCT", "FROM"): 4693 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4694 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4695 4696 if self._match(TokenType.JSON): 4697 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4698 4699 if self._match_text_seq("WITH"): 4700 _with = True 4701 elif self._match_text_seq("WITHOUT"): 4702 _with = False 4703 else: 4704 _with = None 4705 4706 unique = self._match(TokenType.UNIQUE) 4707 self._match_text_seq("KEYS") 4708 expression: t.Optional[exp.Expression] = self.expression( 4709 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4710 ) 4711 else: 4712 expression = self._parse_primary() or self._parse_null() 4713 if not expression: 4714 self._retreat(index) 4715 return None 4716 4717 this = self.expression(exp.Is, this=this, expression=expression) 4718 return self.expression(exp.Not, this=this) if negate else this 4719 4720 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4721 unnest = self._parse_unnest(with_alias=False) 4722 if unnest: 4723 this = self.expression(exp.In, this=this, unnest=unnest) 4724 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4725 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4726 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4727 4728 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4729 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4730 else: 4731 this = self.expression(exp.In, this=this, expressions=expressions) 4732 4733 if matched_l_paren: 4734 self._match_r_paren(this) 4735 elif not self._match(TokenType.R_BRACKET, expression=this): 4736 self.raise_error("Expecting ]") 4737 else: 4738 this = self.expression(exp.In, this=this, field=self._parse_column()) 4739 4740 return this 4741 4742 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4743 low = self._parse_bitwise() 4744 self._match(TokenType.AND) 4745 high = self._parse_bitwise() 4746 return self.expression(exp.Between, this=this, low=low, high=high) 4747 4748 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4749 if not self._match(TokenType.ESCAPE): 4750 return this 4751 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4752 4753 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4754 index = self._index 4755 4756 if not self._match(TokenType.INTERVAL) and match_interval: 4757 return None 4758 4759 if self._match(TokenType.STRING, advance=False): 4760 this = self._parse_primary() 4761 else: 4762 this = self._parse_term() 4763 4764 if not this or ( 4765 isinstance(this, exp.Column) 4766 and not this.table 4767 and not this.this.quoted 4768 and this.name.upper() == "IS" 4769 ): 4770 self._retreat(index) 4771 return None 4772 4773 unit = self._parse_function() or ( 4774 not self._match(TokenType.ALIAS, advance=False) 4775 and self._parse_var(any_token=True, upper=True) 4776 ) 4777 4778 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4779 # each INTERVAL expression into this canonical form so it's easy to transpile 4780 if this and this.is_number: 4781 this = exp.Literal.string(this.to_py()) 4782 elif this and this.is_string: 4783 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4784 if parts and unit: 4785 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4786 unit = None 4787 self._retreat(self._index - 1) 4788 4789 if len(parts) == 1: 4790 this = exp.Literal.string(parts[0][0]) 4791 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4792 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4793 unit = self.expression( 4794 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4795 ) 4796 4797 interval = self.expression(exp.Interval, this=this, unit=unit) 4798 4799 index = self._index 4800 self._match(TokenType.PLUS) 4801 4802 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4803 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4804 return self.expression( 4805 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4806 ) 4807 4808 self._retreat(index) 4809 return interval 4810 4811 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4812 this = self._parse_term() 4813 4814 while True: 4815 if self._match_set(self.BITWISE): 4816 this = self.expression( 4817 self.BITWISE[self._prev.token_type], 4818 this=this, 4819 expression=self._parse_term(), 4820 ) 4821 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4822 this = self.expression( 4823 exp.DPipe, 4824 this=this, 4825 expression=self._parse_term(), 4826 safe=not self.dialect.STRICT_STRING_CONCAT, 4827 ) 4828 elif self._match(TokenType.DQMARK): 4829 this = self.expression( 4830 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4831 ) 4832 elif self._match_pair(TokenType.LT, TokenType.LT): 4833 this = self.expression( 4834 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4835 ) 4836 elif self._match_pair(TokenType.GT, TokenType.GT): 4837 this = self.expression( 4838 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4839 ) 4840 else: 4841 break 4842 4843 return this 4844 4845 def _parse_term(self) -> t.Optional[exp.Expression]: 4846 this = self._parse_factor() 4847 4848 while self._match_set(self.TERM): 4849 klass = self.TERM[self._prev.token_type] 4850 comments = self._prev_comments 4851 expression = self._parse_factor() 4852 4853 this = self.expression(klass, this=this, comments=comments, expression=expression) 4854 4855 if isinstance(this, exp.Collate): 4856 expr = this.expression 4857 4858 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4859 # fallback to Identifier / Var 4860 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4861 ident = expr.this 4862 if isinstance(ident, exp.Identifier): 4863 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4864 4865 return this 4866 4867 def _parse_factor(self) -> t.Optional[exp.Expression]: 4868 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4869 this = parse_method() 4870 4871 while self._match_set(self.FACTOR): 4872 klass = self.FACTOR[self._prev.token_type] 4873 comments = self._prev_comments 4874 expression = parse_method() 4875 4876 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4877 self._retreat(self._index - 1) 4878 return this 4879 4880 this = self.expression(klass, this=this, comments=comments, expression=expression) 4881 4882 if isinstance(this, exp.Div): 4883 this.args["typed"] = self.dialect.TYPED_DIVISION 4884 this.args["safe"] = self.dialect.SAFE_DIVISION 4885 4886 return this 4887 4888 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4889 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4890 4891 def _parse_unary(self) -> t.Optional[exp.Expression]: 4892 if self._match_set(self.UNARY_PARSERS): 4893 return self.UNARY_PARSERS[self._prev.token_type](self) 4894 return self._parse_at_time_zone(self._parse_type()) 4895 4896 def _parse_type( 4897 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4898 ) -> t.Optional[exp.Expression]: 4899 interval = parse_interval and self._parse_interval() 4900 if interval: 4901 return interval 4902 4903 index = self._index 4904 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4905 4906 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4907 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4908 if isinstance(data_type, exp.Cast): 4909 # This constructor can contain ops directly after it, for instance struct unnesting: 4910 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4911 return self._parse_column_ops(data_type) 4912 4913 if data_type: 4914 index2 = self._index 4915 this = self._parse_primary() 4916 4917 if isinstance(this, exp.Literal): 4918 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4919 if parser: 4920 return parser(self, this, data_type) 4921 4922 return self.expression(exp.Cast, this=this, to=data_type) 4923 4924 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4925 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4926 # 4927 # If the index difference here is greater than 1, that means the parser itself must have 4928 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4929 # 4930 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4931 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4932 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4933 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4934 # 4935 # In these cases, we don't really want to return the converted type, but instead retreat 4936 # and try to parse a Column or Identifier in the section below. 4937 if data_type.expressions and index2 - index > 1: 4938 self._retreat(index2) 4939 return self._parse_column_ops(data_type) 4940 4941 self._retreat(index) 4942 4943 if fallback_to_identifier: 4944 return self._parse_id_var() 4945 4946 this = self._parse_column() 4947 return this and self._parse_column_ops(this) 4948 4949 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4950 this = self._parse_type() 4951 if not this: 4952 return None 4953 4954 if isinstance(this, exp.Column) and not this.table: 4955 this = exp.var(this.name.upper()) 4956 4957 return self.expression( 4958 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4959 ) 4960 4961 def _parse_types( 4962 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4963 ) -> t.Optional[exp.Expression]: 4964 index = self._index 4965 4966 this: t.Optional[exp.Expression] = None 4967 prefix = self._match_text_seq("SYSUDTLIB", ".") 4968 4969 if not self._match_set(self.TYPE_TOKENS): 4970 identifier = allow_identifiers and self._parse_id_var( 4971 any_token=False, tokens=(TokenType.VAR,) 4972 ) 4973 if isinstance(identifier, exp.Identifier): 4974 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4975 4976 if len(tokens) != 1: 4977 self.raise_error("Unexpected identifier", self._prev) 4978 4979 if tokens[0].token_type in self.TYPE_TOKENS: 4980 self._prev = tokens[0] 4981 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4982 type_name = identifier.name 4983 4984 while self._match(TokenType.DOT): 4985 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4986 4987 this = exp.DataType.build(type_name, udt=True) 4988 else: 4989 self._retreat(self._index - 1) 4990 return None 4991 else: 4992 return None 4993 4994 type_token = self._prev.token_type 4995 4996 if type_token == TokenType.PSEUDO_TYPE: 4997 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4998 4999 if type_token == TokenType.OBJECT_IDENTIFIER: 5000 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5001 5002 # https://materialize.com/docs/sql/types/map/ 5003 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5004 key_type = self._parse_types( 5005 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5006 ) 5007 if not self._match(TokenType.FARROW): 5008 self._retreat(index) 5009 return None 5010 5011 value_type = self._parse_types( 5012 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5013 ) 5014 if not self._match(TokenType.R_BRACKET): 5015 self._retreat(index) 5016 return None 5017 5018 return exp.DataType( 5019 this=exp.DataType.Type.MAP, 5020 expressions=[key_type, value_type], 5021 nested=True, 5022 prefix=prefix, 5023 ) 5024 5025 nested = type_token in self.NESTED_TYPE_TOKENS 5026 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5027 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5028 expressions = None 5029 maybe_func = False 5030 5031 if self._match(TokenType.L_PAREN): 5032 if is_struct: 5033 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5034 elif nested: 5035 expressions = self._parse_csv( 5036 lambda: self._parse_types( 5037 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5038 ) 5039 ) 5040 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5041 this = expressions[0] 5042 this.set("nullable", True) 5043 self._match_r_paren() 5044 return this 5045 elif type_token in self.ENUM_TYPE_TOKENS: 5046 expressions = self._parse_csv(self._parse_equality) 5047 elif is_aggregate: 5048 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5049 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5050 ) 5051 if not func_or_ident: 5052 return None 5053 expressions = [func_or_ident] 5054 if self._match(TokenType.COMMA): 5055 expressions.extend( 5056 self._parse_csv( 5057 lambda: self._parse_types( 5058 check_func=check_func, 5059 schema=schema, 5060 allow_identifiers=allow_identifiers, 5061 ) 5062 ) 5063 ) 5064 else: 5065 expressions = self._parse_csv(self._parse_type_size) 5066 5067 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5068 if type_token == TokenType.VECTOR and len(expressions) == 2: 5069 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5070 5071 if not expressions or not self._match(TokenType.R_PAREN): 5072 self._retreat(index) 5073 return None 5074 5075 maybe_func = True 5076 5077 values: t.Optional[t.List[exp.Expression]] = None 5078 5079 if nested and self._match(TokenType.LT): 5080 if is_struct: 5081 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5082 else: 5083 expressions = self._parse_csv( 5084 lambda: self._parse_types( 5085 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5086 ) 5087 ) 5088 5089 if not self._match(TokenType.GT): 5090 self.raise_error("Expecting >") 5091 5092 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5093 values = self._parse_csv(self._parse_assignment) 5094 if not values and is_struct: 5095 values = None 5096 self._retreat(self._index - 1) 5097 else: 5098 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5099 5100 if type_token in self.TIMESTAMPS: 5101 if self._match_text_seq("WITH", "TIME", "ZONE"): 5102 maybe_func = False 5103 tz_type = ( 5104 exp.DataType.Type.TIMETZ 5105 if type_token in self.TIMES 5106 else exp.DataType.Type.TIMESTAMPTZ 5107 ) 5108 this = exp.DataType(this=tz_type, expressions=expressions) 5109 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5110 maybe_func = False 5111 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5112 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5113 maybe_func = False 5114 elif type_token == TokenType.INTERVAL: 5115 unit = self._parse_var(upper=True) 5116 if unit: 5117 if self._match_text_seq("TO"): 5118 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5119 5120 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5121 else: 5122 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5123 5124 if maybe_func and check_func: 5125 index2 = self._index 5126 peek = self._parse_string() 5127 5128 if not peek: 5129 self._retreat(index) 5130 return None 5131 5132 self._retreat(index2) 5133 5134 if not this: 5135 if self._match_text_seq("UNSIGNED"): 5136 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5137 if not unsigned_type_token: 5138 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5139 5140 type_token = unsigned_type_token or type_token 5141 5142 this = exp.DataType( 5143 this=exp.DataType.Type[type_token.value], 5144 expressions=expressions, 5145 nested=nested, 5146 prefix=prefix, 5147 ) 5148 5149 # Empty arrays/structs are allowed 5150 if values is not None: 5151 cls = exp.Struct if is_struct else exp.Array 5152 this = exp.cast(cls(expressions=values), this, copy=False) 5153 5154 elif expressions: 5155 this.set("expressions", expressions) 5156 5157 # https://materialize.com/docs/sql/types/list/#type-name 5158 while self._match(TokenType.LIST): 5159 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5160 5161 index = self._index 5162 5163 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5164 matched_array = self._match(TokenType.ARRAY) 5165 5166 while self._curr: 5167 datatype_token = self._prev.token_type 5168 matched_l_bracket = self._match(TokenType.L_BRACKET) 5169 5170 if (not matched_l_bracket and not matched_array) or ( 5171 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5172 ): 5173 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5174 # not to be confused with the fixed size array parsing 5175 break 5176 5177 matched_array = False 5178 values = self._parse_csv(self._parse_assignment) or None 5179 if ( 5180 values 5181 and not schema 5182 and ( 5183 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5184 ) 5185 ): 5186 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5187 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5188 self._retreat(index) 5189 break 5190 5191 this = exp.DataType( 5192 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5193 ) 5194 self._match(TokenType.R_BRACKET) 5195 5196 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5197 converter = self.TYPE_CONVERTERS.get(this.this) 5198 if converter: 5199 this = converter(t.cast(exp.DataType, this)) 5200 5201 return this 5202 5203 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5204 index = self._index 5205 5206 if ( 5207 self._curr 5208 and self._next 5209 and self._curr.token_type in self.TYPE_TOKENS 5210 and self._next.token_type in self.TYPE_TOKENS 5211 ): 5212 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5213 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5214 this = self._parse_id_var() 5215 else: 5216 this = ( 5217 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5218 or self._parse_id_var() 5219 ) 5220 5221 self._match(TokenType.COLON) 5222 5223 if ( 5224 type_required 5225 and not isinstance(this, exp.DataType) 5226 and not self._match_set(self.TYPE_TOKENS, advance=False) 5227 ): 5228 self._retreat(index) 5229 return self._parse_types() 5230 5231 return self._parse_column_def(this) 5232 5233 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5234 if not self._match_text_seq("AT", "TIME", "ZONE"): 5235 return this 5236 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5237 5238 def _parse_column(self) -> t.Optional[exp.Expression]: 5239 this = self._parse_column_reference() 5240 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5241 5242 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5243 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5244 5245 return column 5246 5247 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5248 this = self._parse_field() 5249 if ( 5250 not this 5251 and self._match(TokenType.VALUES, advance=False) 5252 and self.VALUES_FOLLOWED_BY_PAREN 5253 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5254 ): 5255 this = self._parse_id_var() 5256 5257 if isinstance(this, exp.Identifier): 5258 # We bubble up comments from the Identifier to the Column 5259 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5260 5261 return this 5262 5263 def _parse_colon_as_variant_extract( 5264 self, this: t.Optional[exp.Expression] 5265 ) -> t.Optional[exp.Expression]: 5266 casts = [] 5267 json_path = [] 5268 escape = None 5269 5270 while self._match(TokenType.COLON): 5271 start_index = self._index 5272 5273 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5274 path = self._parse_column_ops( 5275 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5276 ) 5277 5278 # The cast :: operator has a lower precedence than the extraction operator :, so 5279 # we rearrange the AST appropriately to avoid casting the JSON path 5280 while isinstance(path, exp.Cast): 5281 casts.append(path.to) 5282 path = path.this 5283 5284 if casts: 5285 dcolon_offset = next( 5286 i 5287 for i, t in enumerate(self._tokens[start_index:]) 5288 if t.token_type == TokenType.DCOLON 5289 ) 5290 end_token = self._tokens[start_index + dcolon_offset - 1] 5291 else: 5292 end_token = self._prev 5293 5294 if path: 5295 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5296 # it'll roundtrip to a string literal in GET_PATH 5297 if isinstance(path, exp.Identifier) and path.quoted: 5298 escape = True 5299 5300 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5301 5302 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5303 # Databricks transforms it back to the colon/dot notation 5304 if json_path: 5305 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5306 5307 if json_path_expr: 5308 json_path_expr.set("escape", escape) 5309 5310 this = self.expression( 5311 exp.JSONExtract, 5312 this=this, 5313 expression=json_path_expr, 5314 variant_extract=True, 5315 ) 5316 5317 while casts: 5318 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5319 5320 return this 5321 5322 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5323 return self._parse_types() 5324 5325 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5326 this = self._parse_bracket(this) 5327 5328 while self._match_set(self.COLUMN_OPERATORS): 5329 op_token = self._prev.token_type 5330 op = self.COLUMN_OPERATORS.get(op_token) 5331 5332 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5333 field = self._parse_dcolon() 5334 if not field: 5335 self.raise_error("Expected type") 5336 elif op and self._curr: 5337 field = self._parse_column_reference() or self._parse_bracket() 5338 else: 5339 field = self._parse_field(any_token=True, anonymous_func=True) 5340 5341 if isinstance(field, (exp.Func, exp.Window)) and this: 5342 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5343 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5344 this = exp.replace_tree( 5345 this, 5346 lambda n: ( 5347 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5348 if n.table 5349 else n.this 5350 ) 5351 if isinstance(n, exp.Column) 5352 else n, 5353 ) 5354 5355 if op: 5356 this = op(self, this, field) 5357 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5358 this = self.expression( 5359 exp.Column, 5360 comments=this.comments, 5361 this=field, 5362 table=this.this, 5363 db=this.args.get("table"), 5364 catalog=this.args.get("db"), 5365 ) 5366 elif isinstance(field, exp.Window): 5367 # Move the exp.Dot's to the window's function 5368 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5369 field.set("this", window_func) 5370 this = field 5371 else: 5372 this = self.expression(exp.Dot, this=this, expression=field) 5373 5374 if field and field.comments: 5375 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5376 5377 this = self._parse_bracket(this) 5378 5379 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5380 5381 def _parse_primary(self) -> t.Optional[exp.Expression]: 5382 if self._match_set(self.PRIMARY_PARSERS): 5383 token_type = self._prev.token_type 5384 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5385 5386 if token_type == TokenType.STRING: 5387 expressions = [primary] 5388 while self._match(TokenType.STRING): 5389 expressions.append(exp.Literal.string(self._prev.text)) 5390 5391 if len(expressions) > 1: 5392 return self.expression(exp.Concat, expressions=expressions) 5393 5394 return primary 5395 5396 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5397 return exp.Literal.number(f"0.{self._prev.text}") 5398 5399 if self._match(TokenType.L_PAREN): 5400 comments = self._prev_comments 5401 query = self._parse_select() 5402 5403 if query: 5404 expressions = [query] 5405 else: 5406 expressions = self._parse_expressions() 5407 5408 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5409 5410 if not this and self._match(TokenType.R_PAREN, advance=False): 5411 this = self.expression(exp.Tuple) 5412 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5413 this = self._parse_subquery(this=this, parse_alias=False) 5414 elif isinstance(this, exp.Subquery): 5415 this = self._parse_subquery( 5416 this=self._parse_set_operations(this), parse_alias=False 5417 ) 5418 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5419 this = self.expression(exp.Tuple, expressions=expressions) 5420 else: 5421 this = self.expression(exp.Paren, this=this) 5422 5423 if this: 5424 this.add_comments(comments) 5425 5426 self._match_r_paren(expression=this) 5427 return this 5428 5429 return None 5430 5431 def _parse_field( 5432 self, 5433 any_token: bool = False, 5434 tokens: t.Optional[t.Collection[TokenType]] = None, 5435 anonymous_func: bool = False, 5436 ) -> t.Optional[exp.Expression]: 5437 if anonymous_func: 5438 field = ( 5439 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5440 or self._parse_primary() 5441 ) 5442 else: 5443 field = self._parse_primary() or self._parse_function( 5444 anonymous=anonymous_func, any_token=any_token 5445 ) 5446 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5447 5448 def _parse_function( 5449 self, 5450 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5451 anonymous: bool = False, 5452 optional_parens: bool = True, 5453 any_token: bool = False, 5454 ) -> t.Optional[exp.Expression]: 5455 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5456 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5457 fn_syntax = False 5458 if ( 5459 self._match(TokenType.L_BRACE, advance=False) 5460 and self._next 5461 and self._next.text.upper() == "FN" 5462 ): 5463 self._advance(2) 5464 fn_syntax = True 5465 5466 func = self._parse_function_call( 5467 functions=functions, 5468 anonymous=anonymous, 5469 optional_parens=optional_parens, 5470 any_token=any_token, 5471 ) 5472 5473 if fn_syntax: 5474 self._match(TokenType.R_BRACE) 5475 5476 return func 5477 5478 def _parse_function_call( 5479 self, 5480 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5481 anonymous: bool = False, 5482 optional_parens: bool = True, 5483 any_token: bool = False, 5484 ) -> t.Optional[exp.Expression]: 5485 if not self._curr: 5486 return None 5487 5488 comments = self._curr.comments 5489 token_type = self._curr.token_type 5490 this = self._curr.text 5491 upper = this.upper() 5492 5493 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5494 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5495 self._advance() 5496 return self._parse_window(parser(self)) 5497 5498 if not self._next or self._next.token_type != TokenType.L_PAREN: 5499 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5500 self._advance() 5501 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5502 5503 return None 5504 5505 if any_token: 5506 if token_type in self.RESERVED_TOKENS: 5507 return None 5508 elif token_type not in self.FUNC_TOKENS: 5509 return None 5510 5511 self._advance(2) 5512 5513 parser = self.FUNCTION_PARSERS.get(upper) 5514 if parser and not anonymous: 5515 this = parser(self) 5516 else: 5517 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5518 5519 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5520 this = self.expression( 5521 subquery_predicate, comments=comments, this=self._parse_select() 5522 ) 5523 self._match_r_paren() 5524 return this 5525 5526 if functions is None: 5527 functions = self.FUNCTIONS 5528 5529 function = functions.get(upper) 5530 known_function = function and not anonymous 5531 5532 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5533 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5534 5535 post_func_comments = self._curr and self._curr.comments 5536 if known_function and post_func_comments: 5537 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5538 # call we'll construct it as exp.Anonymous, even if it's "known" 5539 if any( 5540 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5541 for comment in post_func_comments 5542 ): 5543 known_function = False 5544 5545 if alias and known_function: 5546 args = self._kv_to_prop_eq(args) 5547 5548 if known_function: 5549 func_builder = t.cast(t.Callable, function) 5550 5551 if "dialect" in func_builder.__code__.co_varnames: 5552 func = func_builder(args, dialect=self.dialect) 5553 else: 5554 func = func_builder(args) 5555 5556 func = self.validate_expression(func, args) 5557 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5558 func.meta["name"] = this 5559 5560 this = func 5561 else: 5562 if token_type == TokenType.IDENTIFIER: 5563 this = exp.Identifier(this=this, quoted=True) 5564 this = self.expression(exp.Anonymous, this=this, expressions=args) 5565 5566 if isinstance(this, exp.Expression): 5567 this.add_comments(comments) 5568 5569 self._match_r_paren(this) 5570 return self._parse_window(this) 5571 5572 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5573 return expression 5574 5575 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5576 transformed = [] 5577 5578 for index, e in enumerate(expressions): 5579 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5580 if isinstance(e, exp.Alias): 5581 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5582 5583 if not isinstance(e, exp.PropertyEQ): 5584 e = self.expression( 5585 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5586 ) 5587 5588 if isinstance(e.this, exp.Column): 5589 e.this.replace(e.this.this) 5590 else: 5591 e = self._to_prop_eq(e, index) 5592 5593 transformed.append(e) 5594 5595 return transformed 5596 5597 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5598 return self._parse_statement() 5599 5600 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5601 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5602 5603 def _parse_user_defined_function( 5604 self, kind: t.Optional[TokenType] = None 5605 ) -> t.Optional[exp.Expression]: 5606 this = self._parse_id_var() 5607 5608 while self._match(TokenType.DOT): 5609 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5610 5611 if not self._match(TokenType.L_PAREN): 5612 return this 5613 5614 expressions = self._parse_csv(self._parse_function_parameter) 5615 self._match_r_paren() 5616 return self.expression( 5617 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5618 ) 5619 5620 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5621 literal = self._parse_primary() 5622 if literal: 5623 return self.expression(exp.Introducer, this=token.text, expression=literal) 5624 5625 return self.expression(exp.Identifier, this=token.text) 5626 5627 def _parse_session_parameter(self) -> exp.SessionParameter: 5628 kind = None 5629 this = self._parse_id_var() or self._parse_primary() 5630 5631 if this and self._match(TokenType.DOT): 5632 kind = this.name 5633 this = self._parse_var() or self._parse_primary() 5634 5635 return self.expression(exp.SessionParameter, this=this, kind=kind) 5636 5637 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5638 return self._parse_id_var() 5639 5640 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5641 index = self._index 5642 5643 if self._match(TokenType.L_PAREN): 5644 expressions = t.cast( 5645 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5646 ) 5647 5648 if not self._match(TokenType.R_PAREN): 5649 self._retreat(index) 5650 else: 5651 expressions = [self._parse_lambda_arg()] 5652 5653 if self._match_set(self.LAMBDAS): 5654 return self.LAMBDAS[self._prev.token_type](self, expressions) 5655 5656 self._retreat(index) 5657 5658 this: t.Optional[exp.Expression] 5659 5660 if self._match(TokenType.DISTINCT): 5661 this = self.expression( 5662 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5663 ) 5664 else: 5665 this = self._parse_select_or_expression(alias=alias) 5666 5667 return self._parse_limit( 5668 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5669 ) 5670 5671 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5672 index = self._index 5673 if not self._match(TokenType.L_PAREN): 5674 return this 5675 5676 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5677 # expr can be of both types 5678 if self._match_set(self.SELECT_START_TOKENS): 5679 self._retreat(index) 5680 return this 5681 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5682 self._match_r_paren() 5683 return self.expression(exp.Schema, this=this, expressions=args) 5684 5685 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5686 return self._parse_column_def(self._parse_field(any_token=True)) 5687 5688 def _parse_column_def( 5689 self, this: t.Optional[exp.Expression], computed_column: bool = True 5690 ) -> t.Optional[exp.Expression]: 5691 # column defs are not really columns, they're identifiers 5692 if isinstance(this, exp.Column): 5693 this = this.this 5694 5695 if not computed_column: 5696 self._match(TokenType.ALIAS) 5697 5698 kind = self._parse_types(schema=True) 5699 5700 if self._match_text_seq("FOR", "ORDINALITY"): 5701 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5702 5703 constraints: t.List[exp.Expression] = [] 5704 5705 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5706 ("ALIAS", "MATERIALIZED") 5707 ): 5708 persisted = self._prev.text.upper() == "MATERIALIZED" 5709 constraint_kind = exp.ComputedColumnConstraint( 5710 this=self._parse_assignment(), 5711 persisted=persisted or self._match_text_seq("PERSISTED"), 5712 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5713 ) 5714 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5715 elif ( 5716 kind 5717 and self._match(TokenType.ALIAS, advance=False) 5718 and ( 5719 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5720 or (self._next and self._next.token_type == TokenType.L_PAREN) 5721 ) 5722 ): 5723 self._advance() 5724 constraints.append( 5725 self.expression( 5726 exp.ColumnConstraint, 5727 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5728 ) 5729 ) 5730 5731 while True: 5732 constraint = self._parse_column_constraint() 5733 if not constraint: 5734 break 5735 constraints.append(constraint) 5736 5737 if not kind and not constraints: 5738 return this 5739 5740 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5741 5742 def _parse_auto_increment( 5743 self, 5744 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5745 start = None 5746 increment = None 5747 5748 if self._match(TokenType.L_PAREN, advance=False): 5749 args = self._parse_wrapped_csv(self._parse_bitwise) 5750 start = seq_get(args, 0) 5751 increment = seq_get(args, 1) 5752 elif self._match_text_seq("START"): 5753 start = self._parse_bitwise() 5754 self._match_text_seq("INCREMENT") 5755 increment = self._parse_bitwise() 5756 5757 if start and increment: 5758 return exp.GeneratedAsIdentityColumnConstraint( 5759 start=start, increment=increment, this=False 5760 ) 5761 5762 return exp.AutoIncrementColumnConstraint() 5763 5764 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5765 if not self._match_text_seq("REFRESH"): 5766 self._retreat(self._index - 1) 5767 return None 5768 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5769 5770 def _parse_compress(self) -> exp.CompressColumnConstraint: 5771 if self._match(TokenType.L_PAREN, advance=False): 5772 return self.expression( 5773 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5774 ) 5775 5776 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5777 5778 def _parse_generated_as_identity( 5779 self, 5780 ) -> ( 5781 exp.GeneratedAsIdentityColumnConstraint 5782 | exp.ComputedColumnConstraint 5783 | exp.GeneratedAsRowColumnConstraint 5784 ): 5785 if self._match_text_seq("BY", "DEFAULT"): 5786 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5787 this = self.expression( 5788 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5789 ) 5790 else: 5791 self._match_text_seq("ALWAYS") 5792 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5793 5794 self._match(TokenType.ALIAS) 5795 5796 if self._match_text_seq("ROW"): 5797 start = self._match_text_seq("START") 5798 if not start: 5799 self._match(TokenType.END) 5800 hidden = self._match_text_seq("HIDDEN") 5801 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5802 5803 identity = self._match_text_seq("IDENTITY") 5804 5805 if self._match(TokenType.L_PAREN): 5806 if self._match(TokenType.START_WITH): 5807 this.set("start", self._parse_bitwise()) 5808 if self._match_text_seq("INCREMENT", "BY"): 5809 this.set("increment", self._parse_bitwise()) 5810 if self._match_text_seq("MINVALUE"): 5811 this.set("minvalue", self._parse_bitwise()) 5812 if self._match_text_seq("MAXVALUE"): 5813 this.set("maxvalue", self._parse_bitwise()) 5814 5815 if self._match_text_seq("CYCLE"): 5816 this.set("cycle", True) 5817 elif self._match_text_seq("NO", "CYCLE"): 5818 this.set("cycle", False) 5819 5820 if not identity: 5821 this.set("expression", self._parse_range()) 5822 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5823 args = self._parse_csv(self._parse_bitwise) 5824 this.set("start", seq_get(args, 0)) 5825 this.set("increment", seq_get(args, 1)) 5826 5827 self._match_r_paren() 5828 5829 return this 5830 5831 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5832 self._match_text_seq("LENGTH") 5833 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5834 5835 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5836 if self._match_text_seq("NULL"): 5837 return self.expression(exp.NotNullColumnConstraint) 5838 if self._match_text_seq("CASESPECIFIC"): 5839 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5840 if self._match_text_seq("FOR", "REPLICATION"): 5841 return self.expression(exp.NotForReplicationColumnConstraint) 5842 5843 # Unconsume the `NOT` token 5844 self._retreat(self._index - 1) 5845 return None 5846 5847 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5848 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5849 5850 procedure_option_follows = ( 5851 self._match(TokenType.WITH, advance=False) 5852 and self._next 5853 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5854 ) 5855 5856 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5857 return self.expression( 5858 exp.ColumnConstraint, 5859 this=this, 5860 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5861 ) 5862 5863 return this 5864 5865 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5866 if not self._match(TokenType.CONSTRAINT): 5867 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5868 5869 return self.expression( 5870 exp.Constraint, 5871 this=self._parse_id_var(), 5872 expressions=self._parse_unnamed_constraints(), 5873 ) 5874 5875 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5876 constraints = [] 5877 while True: 5878 constraint = self._parse_unnamed_constraint() or self._parse_function() 5879 if not constraint: 5880 break 5881 constraints.append(constraint) 5882 5883 return constraints 5884 5885 def _parse_unnamed_constraint( 5886 self, constraints: t.Optional[t.Collection[str]] = None 5887 ) -> t.Optional[exp.Expression]: 5888 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5889 constraints or self.CONSTRAINT_PARSERS 5890 ): 5891 return None 5892 5893 constraint = self._prev.text.upper() 5894 if constraint not in self.CONSTRAINT_PARSERS: 5895 self.raise_error(f"No parser found for schema constraint {constraint}.") 5896 5897 return self.CONSTRAINT_PARSERS[constraint](self) 5898 5899 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5900 return self._parse_id_var(any_token=False) 5901 5902 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5903 self._match_text_seq("KEY") 5904 return self.expression( 5905 exp.UniqueColumnConstraint, 5906 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5907 this=self._parse_schema(self._parse_unique_key()), 5908 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5909 on_conflict=self._parse_on_conflict(), 5910 ) 5911 5912 def _parse_key_constraint_options(self) -> t.List[str]: 5913 options = [] 5914 while True: 5915 if not self._curr: 5916 break 5917 5918 if self._match(TokenType.ON): 5919 action = None 5920 on = self._advance_any() and self._prev.text 5921 5922 if self._match_text_seq("NO", "ACTION"): 5923 action = "NO ACTION" 5924 elif self._match_text_seq("CASCADE"): 5925 action = "CASCADE" 5926 elif self._match_text_seq("RESTRICT"): 5927 action = "RESTRICT" 5928 elif self._match_pair(TokenType.SET, TokenType.NULL): 5929 action = "SET NULL" 5930 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5931 action = "SET DEFAULT" 5932 else: 5933 self.raise_error("Invalid key constraint") 5934 5935 options.append(f"ON {on} {action}") 5936 else: 5937 var = self._parse_var_from_options( 5938 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5939 ) 5940 if not var: 5941 break 5942 options.append(var.name) 5943 5944 return options 5945 5946 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5947 if match and not self._match(TokenType.REFERENCES): 5948 return None 5949 5950 expressions = None 5951 this = self._parse_table(schema=True) 5952 options = self._parse_key_constraint_options() 5953 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5954 5955 def _parse_foreign_key(self) -> exp.ForeignKey: 5956 expressions = self._parse_wrapped_id_vars() 5957 reference = self._parse_references() 5958 options = {} 5959 5960 while self._match(TokenType.ON): 5961 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5962 self.raise_error("Expected DELETE or UPDATE") 5963 5964 kind = self._prev.text.lower() 5965 5966 if self._match_text_seq("NO", "ACTION"): 5967 action = "NO ACTION" 5968 elif self._match(TokenType.SET): 5969 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5970 action = "SET " + self._prev.text.upper() 5971 else: 5972 self._advance() 5973 action = self._prev.text.upper() 5974 5975 options[kind] = action 5976 5977 return self.expression( 5978 exp.ForeignKey, 5979 expressions=expressions, 5980 reference=reference, 5981 **options, # type: ignore 5982 ) 5983 5984 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5985 return self._parse_ordered() or self._parse_field() 5986 5987 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5988 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5989 self._retreat(self._index - 1) 5990 return None 5991 5992 id_vars = self._parse_wrapped_id_vars() 5993 return self.expression( 5994 exp.PeriodForSystemTimeConstraint, 5995 this=seq_get(id_vars, 0), 5996 expression=seq_get(id_vars, 1), 5997 ) 5998 5999 def _parse_primary_key( 6000 self, wrapped_optional: bool = False, in_props: bool = False 6001 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6002 desc = ( 6003 self._match_set((TokenType.ASC, TokenType.DESC)) 6004 and self._prev.token_type == TokenType.DESC 6005 ) 6006 6007 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6008 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6009 6010 expressions = self._parse_wrapped_csv( 6011 self._parse_primary_key_part, optional=wrapped_optional 6012 ) 6013 options = self._parse_key_constraint_options() 6014 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6015 6016 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6017 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6018 6019 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6020 """ 6021 Parses a datetime column in ODBC format. We parse the column into the corresponding 6022 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6023 same as we did for `DATE('yyyy-mm-dd')`. 6024 6025 Reference: 6026 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6027 """ 6028 self._match(TokenType.VAR) 6029 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6030 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6031 if not self._match(TokenType.R_BRACE): 6032 self.raise_error("Expected }") 6033 return expression 6034 6035 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6036 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6037 return this 6038 6039 bracket_kind = self._prev.token_type 6040 if ( 6041 bracket_kind == TokenType.L_BRACE 6042 and self._curr 6043 and self._curr.token_type == TokenType.VAR 6044 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6045 ): 6046 return self._parse_odbc_datetime_literal() 6047 6048 expressions = self._parse_csv( 6049 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6050 ) 6051 6052 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6053 self.raise_error("Expected ]") 6054 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6055 self.raise_error("Expected }") 6056 6057 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6058 if bracket_kind == TokenType.L_BRACE: 6059 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6060 elif not this: 6061 this = build_array_constructor( 6062 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6063 ) 6064 else: 6065 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6066 if constructor_type: 6067 return build_array_constructor( 6068 constructor_type, 6069 args=expressions, 6070 bracket_kind=bracket_kind, 6071 dialect=self.dialect, 6072 ) 6073 6074 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6075 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6076 6077 self._add_comments(this) 6078 return self._parse_bracket(this) 6079 6080 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6081 if self._match(TokenType.COLON): 6082 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6083 return this 6084 6085 def _parse_case(self) -> t.Optional[exp.Expression]: 6086 ifs = [] 6087 default = None 6088 6089 comments = self._prev_comments 6090 expression = self._parse_assignment() 6091 6092 while self._match(TokenType.WHEN): 6093 this = self._parse_assignment() 6094 self._match(TokenType.THEN) 6095 then = self._parse_assignment() 6096 ifs.append(self.expression(exp.If, this=this, true=then)) 6097 6098 if self._match(TokenType.ELSE): 6099 default = self._parse_assignment() 6100 6101 if not self._match(TokenType.END): 6102 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6103 default = exp.column("interval") 6104 else: 6105 self.raise_error("Expected END after CASE", self._prev) 6106 6107 return self.expression( 6108 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6109 ) 6110 6111 def _parse_if(self) -> t.Optional[exp.Expression]: 6112 if self._match(TokenType.L_PAREN): 6113 args = self._parse_csv(self._parse_assignment) 6114 this = self.validate_expression(exp.If.from_arg_list(args), args) 6115 self._match_r_paren() 6116 else: 6117 index = self._index - 1 6118 6119 if self.NO_PAREN_IF_COMMANDS and index == 0: 6120 return self._parse_as_command(self._prev) 6121 6122 condition = self._parse_assignment() 6123 6124 if not condition: 6125 self._retreat(index) 6126 return None 6127 6128 self._match(TokenType.THEN) 6129 true = self._parse_assignment() 6130 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6131 self._match(TokenType.END) 6132 this = self.expression(exp.If, this=condition, true=true, false=false) 6133 6134 return this 6135 6136 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6137 if not self._match_text_seq("VALUE", "FOR"): 6138 self._retreat(self._index - 1) 6139 return None 6140 6141 return self.expression( 6142 exp.NextValueFor, 6143 this=self._parse_column(), 6144 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6145 ) 6146 6147 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6148 this = self._parse_function() or self._parse_var_or_string(upper=True) 6149 6150 if self._match(TokenType.FROM): 6151 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6152 6153 if not self._match(TokenType.COMMA): 6154 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6155 6156 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6157 6158 def _parse_gap_fill(self) -> exp.GapFill: 6159 self._match(TokenType.TABLE) 6160 this = self._parse_table() 6161 6162 self._match(TokenType.COMMA) 6163 args = [this, *self._parse_csv(self._parse_lambda)] 6164 6165 gap_fill = exp.GapFill.from_arg_list(args) 6166 return self.validate_expression(gap_fill, args) 6167 6168 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6169 this = self._parse_assignment() 6170 6171 if not self._match(TokenType.ALIAS): 6172 if self._match(TokenType.COMMA): 6173 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6174 6175 self.raise_error("Expected AS after CAST") 6176 6177 fmt = None 6178 to = self._parse_types() 6179 6180 default = self._match(TokenType.DEFAULT) 6181 if default: 6182 default = self._parse_bitwise() 6183 self._match_text_seq("ON", "CONVERSION", "ERROR") 6184 6185 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6186 fmt_string = self._parse_string() 6187 fmt = self._parse_at_time_zone(fmt_string) 6188 6189 if not to: 6190 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6191 if to.this in exp.DataType.TEMPORAL_TYPES: 6192 this = self.expression( 6193 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6194 this=this, 6195 format=exp.Literal.string( 6196 format_time( 6197 fmt_string.this if fmt_string else "", 6198 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6199 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6200 ) 6201 ), 6202 safe=safe, 6203 ) 6204 6205 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6206 this.set("zone", fmt.args["zone"]) 6207 return this 6208 elif not to: 6209 self.raise_error("Expected TYPE after CAST") 6210 elif isinstance(to, exp.Identifier): 6211 to = exp.DataType.build(to.name, udt=True) 6212 elif to.this == exp.DataType.Type.CHAR: 6213 if self._match(TokenType.CHARACTER_SET): 6214 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6215 6216 return self.expression( 6217 exp.Cast if strict else exp.TryCast, 6218 this=this, 6219 to=to, 6220 format=fmt, 6221 safe=safe, 6222 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6223 default=default, 6224 ) 6225 6226 def _parse_string_agg(self) -> exp.GroupConcat: 6227 if self._match(TokenType.DISTINCT): 6228 args: t.List[t.Optional[exp.Expression]] = [ 6229 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6230 ] 6231 if self._match(TokenType.COMMA): 6232 args.extend(self._parse_csv(self._parse_assignment)) 6233 else: 6234 args = self._parse_csv(self._parse_assignment) # type: ignore 6235 6236 if self._match_text_seq("ON", "OVERFLOW"): 6237 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6238 if self._match_text_seq("ERROR"): 6239 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6240 else: 6241 self._match_text_seq("TRUNCATE") 6242 on_overflow = self.expression( 6243 exp.OverflowTruncateBehavior, 6244 this=self._parse_string(), 6245 with_count=( 6246 self._match_text_seq("WITH", "COUNT") 6247 or not self._match_text_seq("WITHOUT", "COUNT") 6248 ), 6249 ) 6250 else: 6251 on_overflow = None 6252 6253 index = self._index 6254 if not self._match(TokenType.R_PAREN) and args: 6255 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6256 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6257 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6258 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6259 6260 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6261 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6262 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6263 if not self._match_text_seq("WITHIN", "GROUP"): 6264 self._retreat(index) 6265 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6266 6267 # The corresponding match_r_paren will be called in parse_function (caller) 6268 self._match_l_paren() 6269 6270 return self.expression( 6271 exp.GroupConcat, 6272 this=self._parse_order(this=seq_get(args, 0)), 6273 separator=seq_get(args, 1), 6274 on_overflow=on_overflow, 6275 ) 6276 6277 def _parse_convert( 6278 self, strict: bool, safe: t.Optional[bool] = None 6279 ) -> t.Optional[exp.Expression]: 6280 this = self._parse_bitwise() 6281 6282 if self._match(TokenType.USING): 6283 to: t.Optional[exp.Expression] = self.expression( 6284 exp.CharacterSet, this=self._parse_var() 6285 ) 6286 elif self._match(TokenType.COMMA): 6287 to = self._parse_types() 6288 else: 6289 to = None 6290 6291 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6292 6293 def _parse_xml_table(self) -> exp.XMLTable: 6294 namespaces = None 6295 passing = None 6296 columns = None 6297 6298 if self._match_text_seq("XMLNAMESPACES", "("): 6299 namespaces = self._parse_xml_namespace() 6300 self._match_text_seq(")", ",") 6301 6302 this = self._parse_string() 6303 6304 if self._match_text_seq("PASSING"): 6305 # The BY VALUE keywords are optional and are provided for semantic clarity 6306 self._match_text_seq("BY", "VALUE") 6307 passing = self._parse_csv(self._parse_column) 6308 6309 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6310 6311 if self._match_text_seq("COLUMNS"): 6312 columns = self._parse_csv(self._parse_field_def) 6313 6314 return self.expression( 6315 exp.XMLTable, 6316 this=this, 6317 namespaces=namespaces, 6318 passing=passing, 6319 columns=columns, 6320 by_ref=by_ref, 6321 ) 6322 6323 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6324 namespaces = [] 6325 6326 while True: 6327 if self._match(TokenType.DEFAULT): 6328 uri = self._parse_string() 6329 else: 6330 uri = self._parse_alias(self._parse_string()) 6331 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6332 if not self._match(TokenType.COMMA): 6333 break 6334 6335 return namespaces 6336 6337 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6338 """ 6339 There are generally two variants of the DECODE function: 6340 6341 - DECODE(bin, charset) 6342 - DECODE(expression, search, result [, search, result] ... [, default]) 6343 6344 The second variant will always be parsed into a CASE expression. Note that NULL 6345 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6346 instead of relying on pattern matching. 6347 """ 6348 args = self._parse_csv(self._parse_assignment) 6349 6350 if len(args) < 3: 6351 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6352 6353 expression, *expressions = args 6354 if not expression: 6355 return None 6356 6357 ifs = [] 6358 for search, result in zip(expressions[::2], expressions[1::2]): 6359 if not search or not result: 6360 return None 6361 6362 if isinstance(search, exp.Literal): 6363 ifs.append( 6364 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6365 ) 6366 elif isinstance(search, exp.Null): 6367 ifs.append( 6368 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6369 ) 6370 else: 6371 cond = exp.or_( 6372 exp.EQ(this=expression.copy(), expression=search), 6373 exp.and_( 6374 exp.Is(this=expression.copy(), expression=exp.Null()), 6375 exp.Is(this=search.copy(), expression=exp.Null()), 6376 copy=False, 6377 ), 6378 copy=False, 6379 ) 6380 ifs.append(exp.If(this=cond, true=result)) 6381 6382 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6383 6384 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6385 self._match_text_seq("KEY") 6386 key = self._parse_column() 6387 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6388 self._match_text_seq("VALUE") 6389 value = self._parse_bitwise() 6390 6391 if not key and not value: 6392 return None 6393 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6394 6395 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6396 if not this or not self._match_text_seq("FORMAT", "JSON"): 6397 return this 6398 6399 return self.expression(exp.FormatJson, this=this) 6400 6401 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6402 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6403 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6404 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6405 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6406 else: 6407 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6408 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6409 6410 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6411 6412 if not empty and not error and not null: 6413 return None 6414 6415 return self.expression( 6416 exp.OnCondition, 6417 empty=empty, 6418 error=error, 6419 null=null, 6420 ) 6421 6422 def _parse_on_handling( 6423 self, on: str, *values: str 6424 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6425 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6426 for value in values: 6427 if self._match_text_seq(value, "ON", on): 6428 return f"{value} ON {on}" 6429 6430 index = self._index 6431 if self._match(TokenType.DEFAULT): 6432 default_value = self._parse_bitwise() 6433 if self._match_text_seq("ON", on): 6434 return default_value 6435 6436 self._retreat(index) 6437 6438 return None 6439 6440 @t.overload 6441 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6442 6443 @t.overload 6444 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6445 6446 def _parse_json_object(self, agg=False): 6447 star = self._parse_star() 6448 expressions = ( 6449 [star] 6450 if star 6451 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6452 ) 6453 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6454 6455 unique_keys = None 6456 if self._match_text_seq("WITH", "UNIQUE"): 6457 unique_keys = True 6458 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6459 unique_keys = False 6460 6461 self._match_text_seq("KEYS") 6462 6463 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6464 self._parse_type() 6465 ) 6466 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6467 6468 return self.expression( 6469 exp.JSONObjectAgg if agg else exp.JSONObject, 6470 expressions=expressions, 6471 null_handling=null_handling, 6472 unique_keys=unique_keys, 6473 return_type=return_type, 6474 encoding=encoding, 6475 ) 6476 6477 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6478 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6479 if not self._match_text_seq("NESTED"): 6480 this = self._parse_id_var() 6481 kind = self._parse_types(allow_identifiers=False) 6482 nested = None 6483 else: 6484 this = None 6485 kind = None 6486 nested = True 6487 6488 path = self._match_text_seq("PATH") and self._parse_string() 6489 nested_schema = nested and self._parse_json_schema() 6490 6491 return self.expression( 6492 exp.JSONColumnDef, 6493 this=this, 6494 kind=kind, 6495 path=path, 6496 nested_schema=nested_schema, 6497 ) 6498 6499 def _parse_json_schema(self) -> exp.JSONSchema: 6500 self._match_text_seq("COLUMNS") 6501 return self.expression( 6502 exp.JSONSchema, 6503 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6504 ) 6505 6506 def _parse_json_table(self) -> exp.JSONTable: 6507 this = self._parse_format_json(self._parse_bitwise()) 6508 path = self._match(TokenType.COMMA) and self._parse_string() 6509 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6510 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6511 schema = self._parse_json_schema() 6512 6513 return exp.JSONTable( 6514 this=this, 6515 schema=schema, 6516 path=path, 6517 error_handling=error_handling, 6518 empty_handling=empty_handling, 6519 ) 6520 6521 def _parse_match_against(self) -> exp.MatchAgainst: 6522 expressions = self._parse_csv(self._parse_column) 6523 6524 self._match_text_seq(")", "AGAINST", "(") 6525 6526 this = self._parse_string() 6527 6528 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6529 modifier = "IN NATURAL LANGUAGE MODE" 6530 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6531 modifier = f"{modifier} WITH QUERY EXPANSION" 6532 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6533 modifier = "IN BOOLEAN MODE" 6534 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6535 modifier = "WITH QUERY EXPANSION" 6536 else: 6537 modifier = None 6538 6539 return self.expression( 6540 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6541 ) 6542 6543 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6544 def _parse_open_json(self) -> exp.OpenJSON: 6545 this = self._parse_bitwise() 6546 path = self._match(TokenType.COMMA) and self._parse_string() 6547 6548 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6549 this = self._parse_field(any_token=True) 6550 kind = self._parse_types() 6551 path = self._parse_string() 6552 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6553 6554 return self.expression( 6555 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6556 ) 6557 6558 expressions = None 6559 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6560 self._match_l_paren() 6561 expressions = self._parse_csv(_parse_open_json_column_def) 6562 6563 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6564 6565 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6566 args = self._parse_csv(self._parse_bitwise) 6567 6568 if self._match(TokenType.IN): 6569 return self.expression( 6570 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6571 ) 6572 6573 if haystack_first: 6574 haystack = seq_get(args, 0) 6575 needle = seq_get(args, 1) 6576 else: 6577 haystack = seq_get(args, 1) 6578 needle = seq_get(args, 0) 6579 6580 return self.expression( 6581 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6582 ) 6583 6584 def _parse_predict(self) -> exp.Predict: 6585 self._match_text_seq("MODEL") 6586 this = self._parse_table() 6587 6588 self._match(TokenType.COMMA) 6589 self._match_text_seq("TABLE") 6590 6591 return self.expression( 6592 exp.Predict, 6593 this=this, 6594 expression=self._parse_table(), 6595 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6596 ) 6597 6598 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6599 args = self._parse_csv(self._parse_table) 6600 return exp.JoinHint(this=func_name.upper(), expressions=args) 6601 6602 def _parse_substring(self) -> exp.Substring: 6603 # Postgres supports the form: substring(string [from int] [for int]) 6604 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6605 6606 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6607 6608 if self._match(TokenType.FROM): 6609 args.append(self._parse_bitwise()) 6610 if self._match(TokenType.FOR): 6611 if len(args) == 1: 6612 args.append(exp.Literal.number(1)) 6613 args.append(self._parse_bitwise()) 6614 6615 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6616 6617 def _parse_trim(self) -> exp.Trim: 6618 # https://www.w3resource.com/sql/character-functions/trim.php 6619 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6620 6621 position = None 6622 collation = None 6623 expression = None 6624 6625 if self._match_texts(self.TRIM_TYPES): 6626 position = self._prev.text.upper() 6627 6628 this = self._parse_bitwise() 6629 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6630 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6631 expression = self._parse_bitwise() 6632 6633 if invert_order: 6634 this, expression = expression, this 6635 6636 if self._match(TokenType.COLLATE): 6637 collation = self._parse_bitwise() 6638 6639 return self.expression( 6640 exp.Trim, this=this, position=position, expression=expression, collation=collation 6641 ) 6642 6643 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6644 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6645 6646 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6647 return self._parse_window(self._parse_id_var(), alias=True) 6648 6649 def _parse_respect_or_ignore_nulls( 6650 self, this: t.Optional[exp.Expression] 6651 ) -> t.Optional[exp.Expression]: 6652 if self._match_text_seq("IGNORE", "NULLS"): 6653 return self.expression(exp.IgnoreNulls, this=this) 6654 if self._match_text_seq("RESPECT", "NULLS"): 6655 return self.expression(exp.RespectNulls, this=this) 6656 return this 6657 6658 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6659 if self._match(TokenType.HAVING): 6660 self._match_texts(("MAX", "MIN")) 6661 max = self._prev.text.upper() != "MIN" 6662 return self.expression( 6663 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6664 ) 6665 6666 return this 6667 6668 def _parse_window( 6669 self, this: t.Optional[exp.Expression], alias: bool = False 6670 ) -> t.Optional[exp.Expression]: 6671 func = this 6672 comments = func.comments if isinstance(func, exp.Expression) else None 6673 6674 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6675 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6676 if self._match_text_seq("WITHIN", "GROUP"): 6677 order = self._parse_wrapped(self._parse_order) 6678 this = self.expression(exp.WithinGroup, this=this, expression=order) 6679 6680 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6681 self._match(TokenType.WHERE) 6682 this = self.expression( 6683 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6684 ) 6685 self._match_r_paren() 6686 6687 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6688 # Some dialects choose to implement and some do not. 6689 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6690 6691 # There is some code above in _parse_lambda that handles 6692 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6693 6694 # The below changes handle 6695 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6696 6697 # Oracle allows both formats 6698 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6699 # and Snowflake chose to do the same for familiarity 6700 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6701 if isinstance(this, exp.AggFunc): 6702 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6703 6704 if ignore_respect and ignore_respect is not this: 6705 ignore_respect.replace(ignore_respect.this) 6706 this = self.expression(ignore_respect.__class__, this=this) 6707 6708 this = self._parse_respect_or_ignore_nulls(this) 6709 6710 # bigquery select from window x AS (partition by ...) 6711 if alias: 6712 over = None 6713 self._match(TokenType.ALIAS) 6714 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6715 return this 6716 else: 6717 over = self._prev.text.upper() 6718 6719 if comments and isinstance(func, exp.Expression): 6720 func.pop_comments() 6721 6722 if not self._match(TokenType.L_PAREN): 6723 return self.expression( 6724 exp.Window, 6725 comments=comments, 6726 this=this, 6727 alias=self._parse_id_var(False), 6728 over=over, 6729 ) 6730 6731 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6732 6733 first = self._match(TokenType.FIRST) 6734 if self._match_text_seq("LAST"): 6735 first = False 6736 6737 partition, order = self._parse_partition_and_order() 6738 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6739 6740 if kind: 6741 self._match(TokenType.BETWEEN) 6742 start = self._parse_window_spec() 6743 self._match(TokenType.AND) 6744 end = self._parse_window_spec() 6745 6746 spec = self.expression( 6747 exp.WindowSpec, 6748 kind=kind, 6749 start=start["value"], 6750 start_side=start["side"], 6751 end=end["value"], 6752 end_side=end["side"], 6753 ) 6754 else: 6755 spec = None 6756 6757 self._match_r_paren() 6758 6759 window = self.expression( 6760 exp.Window, 6761 comments=comments, 6762 this=this, 6763 partition_by=partition, 6764 order=order, 6765 spec=spec, 6766 alias=window_alias, 6767 over=over, 6768 first=first, 6769 ) 6770 6771 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6772 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6773 return self._parse_window(window, alias=alias) 6774 6775 return window 6776 6777 def _parse_partition_and_order( 6778 self, 6779 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6780 return self._parse_partition_by(), self._parse_order() 6781 6782 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6783 self._match(TokenType.BETWEEN) 6784 6785 return { 6786 "value": ( 6787 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6788 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6789 or self._parse_bitwise() 6790 ), 6791 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6792 } 6793 6794 def _parse_alias( 6795 self, this: t.Optional[exp.Expression], explicit: bool = False 6796 ) -> t.Optional[exp.Expression]: 6797 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6798 # so this section tries to parse the clause version and if it fails, it treats the token 6799 # as an identifier (alias) 6800 if self._can_parse_limit_or_offset(): 6801 return this 6802 6803 any_token = self._match(TokenType.ALIAS) 6804 comments = self._prev_comments or [] 6805 6806 if explicit and not any_token: 6807 return this 6808 6809 if self._match(TokenType.L_PAREN): 6810 aliases = self.expression( 6811 exp.Aliases, 6812 comments=comments, 6813 this=this, 6814 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6815 ) 6816 self._match_r_paren(aliases) 6817 return aliases 6818 6819 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6820 self.STRING_ALIASES and self._parse_string_as_identifier() 6821 ) 6822 6823 if alias: 6824 comments.extend(alias.pop_comments()) 6825 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6826 column = this.this 6827 6828 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6829 if not this.comments and column and column.comments: 6830 this.comments = column.pop_comments() 6831 6832 return this 6833 6834 def _parse_id_var( 6835 self, 6836 any_token: bool = True, 6837 tokens: t.Optional[t.Collection[TokenType]] = None, 6838 ) -> t.Optional[exp.Expression]: 6839 expression = self._parse_identifier() 6840 if not expression and ( 6841 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6842 ): 6843 quoted = self._prev.token_type == TokenType.STRING 6844 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6845 6846 return expression 6847 6848 def _parse_string(self) -> t.Optional[exp.Expression]: 6849 if self._match_set(self.STRING_PARSERS): 6850 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6851 return self._parse_placeholder() 6852 6853 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6854 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6855 6856 def _parse_number(self) -> t.Optional[exp.Expression]: 6857 if self._match_set(self.NUMERIC_PARSERS): 6858 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6859 return self._parse_placeholder() 6860 6861 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6862 if self._match(TokenType.IDENTIFIER): 6863 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6864 return self._parse_placeholder() 6865 6866 def _parse_var( 6867 self, 6868 any_token: bool = False, 6869 tokens: t.Optional[t.Collection[TokenType]] = None, 6870 upper: bool = False, 6871 ) -> t.Optional[exp.Expression]: 6872 if ( 6873 (any_token and self._advance_any()) 6874 or self._match(TokenType.VAR) 6875 or (self._match_set(tokens) if tokens else False) 6876 ): 6877 return self.expression( 6878 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6879 ) 6880 return self._parse_placeholder() 6881 6882 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6883 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6884 self._advance() 6885 return self._prev 6886 return None 6887 6888 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6889 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6890 6891 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6892 return self._parse_primary() or self._parse_var(any_token=True) 6893 6894 def _parse_null(self) -> t.Optional[exp.Expression]: 6895 if self._match_set(self.NULL_TOKENS): 6896 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6897 return self._parse_placeholder() 6898 6899 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6900 if self._match(TokenType.TRUE): 6901 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6902 if self._match(TokenType.FALSE): 6903 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6904 return self._parse_placeholder() 6905 6906 def _parse_star(self) -> t.Optional[exp.Expression]: 6907 if self._match(TokenType.STAR): 6908 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6909 return self._parse_placeholder() 6910 6911 def _parse_parameter(self) -> exp.Parameter: 6912 this = self._parse_identifier() or self._parse_primary_or_var() 6913 return self.expression(exp.Parameter, this=this) 6914 6915 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6916 if self._match_set(self.PLACEHOLDER_PARSERS): 6917 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6918 if placeholder: 6919 return placeholder 6920 self._advance(-1) 6921 return None 6922 6923 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6924 if not self._match_texts(keywords): 6925 return None 6926 if self._match(TokenType.L_PAREN, advance=False): 6927 return self._parse_wrapped_csv(self._parse_expression) 6928 6929 expression = self._parse_expression() 6930 return [expression] if expression else None 6931 6932 def _parse_csv( 6933 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6934 ) -> t.List[exp.Expression]: 6935 parse_result = parse_method() 6936 items = [parse_result] if parse_result is not None else [] 6937 6938 while self._match(sep): 6939 self._add_comments(parse_result) 6940 parse_result = parse_method() 6941 if parse_result is not None: 6942 items.append(parse_result) 6943 6944 return items 6945 6946 def _parse_tokens( 6947 self, parse_method: t.Callable, expressions: t.Dict 6948 ) -> t.Optional[exp.Expression]: 6949 this = parse_method() 6950 6951 while self._match_set(expressions): 6952 this = self.expression( 6953 expressions[self._prev.token_type], 6954 this=this, 6955 comments=self._prev_comments, 6956 expression=parse_method(), 6957 ) 6958 6959 return this 6960 6961 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6962 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6963 6964 def _parse_wrapped_csv( 6965 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6966 ) -> t.List[exp.Expression]: 6967 return self._parse_wrapped( 6968 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6969 ) 6970 6971 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6972 wrapped = self._match(TokenType.L_PAREN) 6973 if not wrapped and not optional: 6974 self.raise_error("Expecting (") 6975 parse_result = parse_method() 6976 if wrapped: 6977 self._match_r_paren() 6978 return parse_result 6979 6980 def _parse_expressions(self) -> t.List[exp.Expression]: 6981 return self._parse_csv(self._parse_expression) 6982 6983 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6984 return self._parse_select() or self._parse_set_operations( 6985 self._parse_alias(self._parse_assignment(), explicit=True) 6986 if alias 6987 else self._parse_assignment() 6988 ) 6989 6990 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6991 return self._parse_query_modifiers( 6992 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6993 ) 6994 6995 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6996 this = None 6997 if self._match_texts(self.TRANSACTION_KIND): 6998 this = self._prev.text 6999 7000 self._match_texts(("TRANSACTION", "WORK")) 7001 7002 modes = [] 7003 while True: 7004 mode = [] 7005 while self._match(TokenType.VAR): 7006 mode.append(self._prev.text) 7007 7008 if mode: 7009 modes.append(" ".join(mode)) 7010 if not self._match(TokenType.COMMA): 7011 break 7012 7013 return self.expression(exp.Transaction, this=this, modes=modes) 7014 7015 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7016 chain = None 7017 savepoint = None 7018 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7019 7020 self._match_texts(("TRANSACTION", "WORK")) 7021 7022 if self._match_text_seq("TO"): 7023 self._match_text_seq("SAVEPOINT") 7024 savepoint = self._parse_id_var() 7025 7026 if self._match(TokenType.AND): 7027 chain = not self._match_text_seq("NO") 7028 self._match_text_seq("CHAIN") 7029 7030 if is_rollback: 7031 return self.expression(exp.Rollback, savepoint=savepoint) 7032 7033 return self.expression(exp.Commit, chain=chain) 7034 7035 def _parse_refresh(self) -> exp.Refresh: 7036 self._match(TokenType.TABLE) 7037 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7038 7039 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7040 if not self._match_text_seq("ADD"): 7041 return None 7042 7043 self._match(TokenType.COLUMN) 7044 exists_column = self._parse_exists(not_=True) 7045 expression = self._parse_field_def() 7046 7047 if expression: 7048 expression.set("exists", exists_column) 7049 7050 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7051 if self._match_texts(("FIRST", "AFTER")): 7052 position = self._prev.text 7053 column_position = self.expression( 7054 exp.ColumnPosition, this=self._parse_column(), position=position 7055 ) 7056 expression.set("position", column_position) 7057 7058 return expression 7059 7060 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7061 drop = self._match(TokenType.DROP) and self._parse_drop() 7062 if drop and not isinstance(drop, exp.Command): 7063 drop.set("kind", drop.args.get("kind", "COLUMN")) 7064 return drop 7065 7066 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7067 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7068 return self.expression( 7069 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7070 ) 7071 7072 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7073 index = self._index - 1 7074 7075 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7076 return self._parse_csv( 7077 lambda: self.expression( 7078 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7079 ) 7080 ) 7081 7082 self._retreat(index) 7083 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7084 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7085 7086 if self._match_text_seq("ADD", "COLUMNS"): 7087 schema = self._parse_schema() 7088 if schema: 7089 return [schema] 7090 return [] 7091 7092 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7093 7094 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7095 if self._match_texts(self.ALTER_ALTER_PARSERS): 7096 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7097 7098 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7099 # keyword after ALTER we default to parsing this statement 7100 self._match(TokenType.COLUMN) 7101 column = self._parse_field(any_token=True) 7102 7103 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7104 return self.expression(exp.AlterColumn, this=column, drop=True) 7105 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7106 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7107 if self._match(TokenType.COMMENT): 7108 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7109 if self._match_text_seq("DROP", "NOT", "NULL"): 7110 return self.expression( 7111 exp.AlterColumn, 7112 this=column, 7113 drop=True, 7114 allow_null=True, 7115 ) 7116 if self._match_text_seq("SET", "NOT", "NULL"): 7117 return self.expression( 7118 exp.AlterColumn, 7119 this=column, 7120 allow_null=False, 7121 ) 7122 7123 if self._match_text_seq("SET", "VISIBLE"): 7124 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7125 if self._match_text_seq("SET", "INVISIBLE"): 7126 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7127 7128 self._match_text_seq("SET", "DATA") 7129 self._match_text_seq("TYPE") 7130 return self.expression( 7131 exp.AlterColumn, 7132 this=column, 7133 dtype=self._parse_types(), 7134 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7135 using=self._match(TokenType.USING) and self._parse_assignment(), 7136 ) 7137 7138 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7139 if self._match_texts(("ALL", "EVEN", "AUTO")): 7140 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7141 7142 self._match_text_seq("KEY", "DISTKEY") 7143 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7144 7145 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7146 if compound: 7147 self._match_text_seq("SORTKEY") 7148 7149 if self._match(TokenType.L_PAREN, advance=False): 7150 return self.expression( 7151 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7152 ) 7153 7154 self._match_texts(("AUTO", "NONE")) 7155 return self.expression( 7156 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7157 ) 7158 7159 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7160 index = self._index - 1 7161 7162 partition_exists = self._parse_exists() 7163 if self._match(TokenType.PARTITION, advance=False): 7164 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7165 7166 self._retreat(index) 7167 return self._parse_csv(self._parse_drop_column) 7168 7169 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7170 if self._match(TokenType.COLUMN): 7171 exists = self._parse_exists() 7172 old_column = self._parse_column() 7173 to = self._match_text_seq("TO") 7174 new_column = self._parse_column() 7175 7176 if old_column is None or to is None or new_column is None: 7177 return None 7178 7179 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7180 7181 self._match_text_seq("TO") 7182 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7183 7184 def _parse_alter_table_set(self) -> exp.AlterSet: 7185 alter_set = self.expression(exp.AlterSet) 7186 7187 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7188 "TABLE", "PROPERTIES" 7189 ): 7190 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7191 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7192 alter_set.set("expressions", [self._parse_assignment()]) 7193 elif self._match_texts(("LOGGED", "UNLOGGED")): 7194 alter_set.set("option", exp.var(self._prev.text.upper())) 7195 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7196 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7197 elif self._match_text_seq("LOCATION"): 7198 alter_set.set("location", self._parse_field()) 7199 elif self._match_text_seq("ACCESS", "METHOD"): 7200 alter_set.set("access_method", self._parse_field()) 7201 elif self._match_text_seq("TABLESPACE"): 7202 alter_set.set("tablespace", self._parse_field()) 7203 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7204 alter_set.set("file_format", [self._parse_field()]) 7205 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7206 alter_set.set("file_format", self._parse_wrapped_options()) 7207 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7208 alter_set.set("copy_options", self._parse_wrapped_options()) 7209 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7210 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7211 else: 7212 if self._match_text_seq("SERDE"): 7213 alter_set.set("serde", self._parse_field()) 7214 7215 alter_set.set("expressions", [self._parse_properties()]) 7216 7217 return alter_set 7218 7219 def _parse_alter(self) -> exp.Alter | exp.Command: 7220 start = self._prev 7221 7222 alter_token = self._match_set(self.ALTERABLES) and self._prev 7223 if not alter_token: 7224 return self._parse_as_command(start) 7225 7226 exists = self._parse_exists() 7227 only = self._match_text_seq("ONLY") 7228 this = self._parse_table(schema=True) 7229 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7230 7231 if self._next: 7232 self._advance() 7233 7234 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7235 if parser: 7236 actions = ensure_list(parser(self)) 7237 not_valid = self._match_text_seq("NOT", "VALID") 7238 options = self._parse_csv(self._parse_property) 7239 7240 if not self._curr and actions: 7241 return self.expression( 7242 exp.Alter, 7243 this=this, 7244 kind=alter_token.text.upper(), 7245 exists=exists, 7246 actions=actions, 7247 only=only, 7248 options=options, 7249 cluster=cluster, 7250 not_valid=not_valid, 7251 ) 7252 7253 return self._parse_as_command(start) 7254 7255 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7256 start = self._prev 7257 # https://duckdb.org/docs/sql/statements/analyze 7258 if not self._curr: 7259 return self.expression(exp.Analyze) 7260 7261 options = [] 7262 while self._match_texts(self.ANALYZE_STYLES): 7263 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7264 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7265 else: 7266 options.append(self._prev.text.upper()) 7267 7268 this: t.Optional[exp.Expression] = None 7269 inner_expression: t.Optional[exp.Expression] = None 7270 7271 kind = self._curr and self._curr.text.upper() 7272 7273 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7274 this = self._parse_table_parts() 7275 elif self._match_text_seq("TABLES"): 7276 if self._match_set((TokenType.FROM, TokenType.IN)): 7277 kind = f"{kind} {self._prev.text.upper()}" 7278 this = self._parse_table(schema=True, is_db_reference=True) 7279 elif self._match_text_seq("DATABASE"): 7280 this = self._parse_table(schema=True, is_db_reference=True) 7281 elif self._match_text_seq("CLUSTER"): 7282 this = self._parse_table() 7283 # Try matching inner expr keywords before fallback to parse table. 7284 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7285 kind = None 7286 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7287 else: 7288 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7289 kind = None 7290 this = self._parse_table_parts() 7291 7292 partition = self._try_parse(self._parse_partition) 7293 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7294 return self._parse_as_command(start) 7295 7296 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7297 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7298 "WITH", "ASYNC", "MODE" 7299 ): 7300 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7301 else: 7302 mode = None 7303 7304 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7305 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7306 7307 properties = self._parse_properties() 7308 return self.expression( 7309 exp.Analyze, 7310 kind=kind, 7311 this=this, 7312 mode=mode, 7313 partition=partition, 7314 properties=properties, 7315 expression=inner_expression, 7316 options=options, 7317 ) 7318 7319 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7320 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7321 this = None 7322 kind = self._prev.text.upper() 7323 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7324 expressions = [] 7325 7326 if not self._match_text_seq("STATISTICS"): 7327 self.raise_error("Expecting token STATISTICS") 7328 7329 if self._match_text_seq("NOSCAN"): 7330 this = "NOSCAN" 7331 elif self._match(TokenType.FOR): 7332 if self._match_text_seq("ALL", "COLUMNS"): 7333 this = "FOR ALL COLUMNS" 7334 if self._match_texts("COLUMNS"): 7335 this = "FOR COLUMNS" 7336 expressions = self._parse_csv(self._parse_column_reference) 7337 elif self._match_text_seq("SAMPLE"): 7338 sample = self._parse_number() 7339 expressions = [ 7340 self.expression( 7341 exp.AnalyzeSample, 7342 sample=sample, 7343 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7344 ) 7345 ] 7346 7347 return self.expression( 7348 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7349 ) 7350 7351 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7352 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7353 kind = None 7354 this = None 7355 expression: t.Optional[exp.Expression] = None 7356 if self._match_text_seq("REF", "UPDATE"): 7357 kind = "REF" 7358 this = "UPDATE" 7359 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7360 this = "UPDATE SET DANGLING TO NULL" 7361 elif self._match_text_seq("STRUCTURE"): 7362 kind = "STRUCTURE" 7363 if self._match_text_seq("CASCADE", "FAST"): 7364 this = "CASCADE FAST" 7365 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7366 ("ONLINE", "OFFLINE") 7367 ): 7368 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7369 expression = self._parse_into() 7370 7371 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7372 7373 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7374 this = self._prev.text.upper() 7375 if self._match_text_seq("COLUMNS"): 7376 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7377 return None 7378 7379 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7380 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7381 if self._match_text_seq("STATISTICS"): 7382 return self.expression(exp.AnalyzeDelete, kind=kind) 7383 return None 7384 7385 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7386 if self._match_text_seq("CHAINED", "ROWS"): 7387 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7388 return None 7389 7390 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7391 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7392 this = self._prev.text.upper() 7393 expression: t.Optional[exp.Expression] = None 7394 expressions = [] 7395 update_options = None 7396 7397 if self._match_text_seq("HISTOGRAM", "ON"): 7398 expressions = self._parse_csv(self._parse_column_reference) 7399 with_expressions = [] 7400 while self._match(TokenType.WITH): 7401 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7402 if self._match_texts(("SYNC", "ASYNC")): 7403 if self._match_text_seq("MODE", advance=False): 7404 with_expressions.append(f"{self._prev.text.upper()} MODE") 7405 self._advance() 7406 else: 7407 buckets = self._parse_number() 7408 if self._match_text_seq("BUCKETS"): 7409 with_expressions.append(f"{buckets} BUCKETS") 7410 if with_expressions: 7411 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7412 7413 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7414 TokenType.UPDATE, advance=False 7415 ): 7416 update_options = self._prev.text.upper() 7417 self._advance() 7418 elif self._match_text_seq("USING", "DATA"): 7419 expression = self.expression(exp.UsingData, this=self._parse_string()) 7420 7421 return self.expression( 7422 exp.AnalyzeHistogram, 7423 this=this, 7424 expressions=expressions, 7425 expression=expression, 7426 update_options=update_options, 7427 ) 7428 7429 def _parse_merge(self) -> exp.Merge: 7430 self._match(TokenType.INTO) 7431 target = self._parse_table() 7432 7433 if target and self._match(TokenType.ALIAS, advance=False): 7434 target.set("alias", self._parse_table_alias()) 7435 7436 self._match(TokenType.USING) 7437 using = self._parse_table() 7438 7439 self._match(TokenType.ON) 7440 on = self._parse_assignment() 7441 7442 return self.expression( 7443 exp.Merge, 7444 this=target, 7445 using=using, 7446 on=on, 7447 whens=self._parse_when_matched(), 7448 returning=self._parse_returning(), 7449 ) 7450 7451 def _parse_when_matched(self) -> exp.Whens: 7452 whens = [] 7453 7454 while self._match(TokenType.WHEN): 7455 matched = not self._match(TokenType.NOT) 7456 self._match_text_seq("MATCHED") 7457 source = ( 7458 False 7459 if self._match_text_seq("BY", "TARGET") 7460 else self._match_text_seq("BY", "SOURCE") 7461 ) 7462 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7463 7464 self._match(TokenType.THEN) 7465 7466 if self._match(TokenType.INSERT): 7467 this = self._parse_star() 7468 if this: 7469 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7470 else: 7471 then = self.expression( 7472 exp.Insert, 7473 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7474 expression=self._match_text_seq("VALUES") and self._parse_value(), 7475 ) 7476 elif self._match(TokenType.UPDATE): 7477 expressions = self._parse_star() 7478 if expressions: 7479 then = self.expression(exp.Update, expressions=expressions) 7480 else: 7481 then = self.expression( 7482 exp.Update, 7483 expressions=self._match(TokenType.SET) 7484 and self._parse_csv(self._parse_equality), 7485 ) 7486 elif self._match(TokenType.DELETE): 7487 then = self.expression(exp.Var, this=self._prev.text) 7488 else: 7489 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7490 7491 whens.append( 7492 self.expression( 7493 exp.When, 7494 matched=matched, 7495 source=source, 7496 condition=condition, 7497 then=then, 7498 ) 7499 ) 7500 return self.expression(exp.Whens, expressions=whens) 7501 7502 def _parse_show(self) -> t.Optional[exp.Expression]: 7503 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7504 if parser: 7505 return parser(self) 7506 return self._parse_as_command(self._prev) 7507 7508 def _parse_set_item_assignment( 7509 self, kind: t.Optional[str] = None 7510 ) -> t.Optional[exp.Expression]: 7511 index = self._index 7512 7513 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7514 return self._parse_set_transaction(global_=kind == "GLOBAL") 7515 7516 left = self._parse_primary() or self._parse_column() 7517 assignment_delimiter = self._match_texts(("=", "TO")) 7518 7519 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7520 self._retreat(index) 7521 return None 7522 7523 right = self._parse_statement() or self._parse_id_var() 7524 if isinstance(right, (exp.Column, exp.Identifier)): 7525 right = exp.var(right.name) 7526 7527 this = self.expression(exp.EQ, this=left, expression=right) 7528 return self.expression(exp.SetItem, this=this, kind=kind) 7529 7530 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7531 self._match_text_seq("TRANSACTION") 7532 characteristics = self._parse_csv( 7533 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7534 ) 7535 return self.expression( 7536 exp.SetItem, 7537 expressions=characteristics, 7538 kind="TRANSACTION", 7539 **{"global": global_}, # type: ignore 7540 ) 7541 7542 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7543 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7544 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7545 7546 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7547 index = self._index 7548 set_ = self.expression( 7549 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7550 ) 7551 7552 if self._curr: 7553 self._retreat(index) 7554 return self._parse_as_command(self._prev) 7555 7556 return set_ 7557 7558 def _parse_var_from_options( 7559 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7560 ) -> t.Optional[exp.Var]: 7561 start = self._curr 7562 if not start: 7563 return None 7564 7565 option = start.text.upper() 7566 continuations = options.get(option) 7567 7568 index = self._index 7569 self._advance() 7570 for keywords in continuations or []: 7571 if isinstance(keywords, str): 7572 keywords = (keywords,) 7573 7574 if self._match_text_seq(*keywords): 7575 option = f"{option} {' '.join(keywords)}" 7576 break 7577 else: 7578 if continuations or continuations is None: 7579 if raise_unmatched: 7580 self.raise_error(f"Unknown option {option}") 7581 7582 self._retreat(index) 7583 return None 7584 7585 return exp.var(option) 7586 7587 def _parse_as_command(self, start: Token) -> exp.Command: 7588 while self._curr: 7589 self._advance() 7590 text = self._find_sql(start, self._prev) 7591 size = len(start.text) 7592 self._warn_unsupported() 7593 return exp.Command(this=text[:size], expression=text[size:]) 7594 7595 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7596 settings = [] 7597 7598 self._match_l_paren() 7599 kind = self._parse_id_var() 7600 7601 if self._match(TokenType.L_PAREN): 7602 while True: 7603 key = self._parse_id_var() 7604 value = self._parse_primary() 7605 if not key and value is None: 7606 break 7607 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7608 self._match(TokenType.R_PAREN) 7609 7610 self._match_r_paren() 7611 7612 return self.expression( 7613 exp.DictProperty, 7614 this=this, 7615 kind=kind.this if kind else None, 7616 settings=settings, 7617 ) 7618 7619 def _parse_dict_range(self, this: str) -> exp.DictRange: 7620 self._match_l_paren() 7621 has_min = self._match_text_seq("MIN") 7622 if has_min: 7623 min = self._parse_var() or self._parse_primary() 7624 self._match_text_seq("MAX") 7625 max = self._parse_var() or self._parse_primary() 7626 else: 7627 max = self._parse_var() or self._parse_primary() 7628 min = exp.Literal.number(0) 7629 self._match_r_paren() 7630 return self.expression(exp.DictRange, this=this, min=min, max=max) 7631 7632 def _parse_comprehension( 7633 self, this: t.Optional[exp.Expression] 7634 ) -> t.Optional[exp.Comprehension]: 7635 index = self._index 7636 expression = self._parse_column() 7637 if not self._match(TokenType.IN): 7638 self._retreat(index - 1) 7639 return None 7640 iterator = self._parse_column() 7641 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7642 return self.expression( 7643 exp.Comprehension, 7644 this=this, 7645 expression=expression, 7646 iterator=iterator, 7647 condition=condition, 7648 ) 7649 7650 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7651 if self._match(TokenType.HEREDOC_STRING): 7652 return self.expression(exp.Heredoc, this=self._prev.text) 7653 7654 if not self._match_text_seq("$"): 7655 return None 7656 7657 tags = ["$"] 7658 tag_text = None 7659 7660 if self._is_connected(): 7661 self._advance() 7662 tags.append(self._prev.text.upper()) 7663 else: 7664 self.raise_error("No closing $ found") 7665 7666 if tags[-1] != "$": 7667 if self._is_connected() and self._match_text_seq("$"): 7668 tag_text = tags[-1] 7669 tags.append("$") 7670 else: 7671 self.raise_error("No closing $ found") 7672 7673 heredoc_start = self._curr 7674 7675 while self._curr: 7676 if self._match_text_seq(*tags, advance=False): 7677 this = self._find_sql(heredoc_start, self._prev) 7678 self._advance(len(tags)) 7679 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7680 7681 self._advance() 7682 7683 self.raise_error(f"No closing {''.join(tags)} found") 7684 return None 7685 7686 def _find_parser( 7687 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7688 ) -> t.Optional[t.Callable]: 7689 if not self._curr: 7690 return None 7691 7692 index = self._index 7693 this = [] 7694 while True: 7695 # The current token might be multiple words 7696 curr = self._curr.text.upper() 7697 key = curr.split(" ") 7698 this.append(curr) 7699 7700 self._advance() 7701 result, trie = in_trie(trie, key) 7702 if result == TrieResult.FAILED: 7703 break 7704 7705 if result == TrieResult.EXISTS: 7706 subparser = parsers[" ".join(this)] 7707 return subparser 7708 7709 self._retreat(index) 7710 return None 7711 7712 def _match(self, token_type, advance=True, expression=None): 7713 if not self._curr: 7714 return None 7715 7716 if self._curr.token_type == token_type: 7717 if advance: 7718 self._advance() 7719 self._add_comments(expression) 7720 return True 7721 7722 return None 7723 7724 def _match_set(self, types, advance=True): 7725 if not self._curr: 7726 return None 7727 7728 if self._curr.token_type in types: 7729 if advance: 7730 self._advance() 7731 return True 7732 7733 return None 7734 7735 def _match_pair(self, token_type_a, token_type_b, advance=True): 7736 if not self._curr or not self._next: 7737 return None 7738 7739 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7740 if advance: 7741 self._advance(2) 7742 return True 7743 7744 return None 7745 7746 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7747 if not self._match(TokenType.L_PAREN, expression=expression): 7748 self.raise_error("Expecting (") 7749 7750 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7751 if not self._match(TokenType.R_PAREN, expression=expression): 7752 self.raise_error("Expecting )") 7753 7754 def _match_texts(self, texts, advance=True): 7755 if ( 7756 self._curr 7757 and self._curr.token_type != TokenType.STRING 7758 and self._curr.text.upper() in texts 7759 ): 7760 if advance: 7761 self._advance() 7762 return True 7763 return None 7764 7765 def _match_text_seq(self, *texts, advance=True): 7766 index = self._index 7767 for text in texts: 7768 if ( 7769 self._curr 7770 and self._curr.token_type != TokenType.STRING 7771 and self._curr.text.upper() == text 7772 ): 7773 self._advance() 7774 else: 7775 self._retreat(index) 7776 return None 7777 7778 if not advance: 7779 self._retreat(index) 7780 7781 return True 7782 7783 def _replace_lambda( 7784 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7785 ) -> t.Optional[exp.Expression]: 7786 if not node: 7787 return node 7788 7789 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7790 7791 for column in node.find_all(exp.Column): 7792 typ = lambda_types.get(column.parts[0].name) 7793 if typ is not None: 7794 dot_or_id = column.to_dot() if column.table else column.this 7795 7796 if typ: 7797 dot_or_id = self.expression( 7798 exp.Cast, 7799 this=dot_or_id, 7800 to=typ, 7801 ) 7802 7803 parent = column.parent 7804 7805 while isinstance(parent, exp.Dot): 7806 if not isinstance(parent.parent, exp.Dot): 7807 parent.replace(dot_or_id) 7808 break 7809 parent = parent.parent 7810 else: 7811 if column is node: 7812 node = dot_or_id 7813 else: 7814 column.replace(dot_or_id) 7815 return node 7816 7817 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7818 start = self._prev 7819 7820 # Not to be confused with TRUNCATE(number, decimals) function call 7821 if self._match(TokenType.L_PAREN): 7822 self._retreat(self._index - 2) 7823 return self._parse_function() 7824 7825 # Clickhouse supports TRUNCATE DATABASE as well 7826 is_database = self._match(TokenType.DATABASE) 7827 7828 self._match(TokenType.TABLE) 7829 7830 exists = self._parse_exists(not_=False) 7831 7832 expressions = self._parse_csv( 7833 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7834 ) 7835 7836 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7837 7838 if self._match_text_seq("RESTART", "IDENTITY"): 7839 identity = "RESTART" 7840 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7841 identity = "CONTINUE" 7842 else: 7843 identity = None 7844 7845 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7846 option = self._prev.text 7847 else: 7848 option = None 7849 7850 partition = self._parse_partition() 7851 7852 # Fallback case 7853 if self._curr: 7854 return self._parse_as_command(start) 7855 7856 return self.expression( 7857 exp.TruncateTable, 7858 expressions=expressions, 7859 is_database=is_database, 7860 exists=exists, 7861 cluster=cluster, 7862 identity=identity, 7863 option=option, 7864 partition=partition, 7865 ) 7866 7867 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7868 this = self._parse_ordered(self._parse_opclass) 7869 7870 if not self._match(TokenType.WITH): 7871 return this 7872 7873 op = self._parse_var(any_token=True) 7874 7875 return self.expression(exp.WithOperator, this=this, op=op) 7876 7877 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7878 self._match(TokenType.EQ) 7879 self._match(TokenType.L_PAREN) 7880 7881 opts: t.List[t.Optional[exp.Expression]] = [] 7882 while self._curr and not self._match(TokenType.R_PAREN): 7883 if self._match_text_seq("FORMAT_NAME", "="): 7884 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7885 # so we parse it separately to use _parse_field() 7886 prop = self.expression( 7887 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7888 ) 7889 opts.append(prop) 7890 else: 7891 opts.append(self._parse_property()) 7892 7893 self._match(TokenType.COMMA) 7894 7895 return opts 7896 7897 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7898 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7899 7900 options = [] 7901 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7902 option = self._parse_var(any_token=True) 7903 prev = self._prev.text.upper() 7904 7905 # Different dialects might separate options and values by white space, "=" and "AS" 7906 self._match(TokenType.EQ) 7907 self._match(TokenType.ALIAS) 7908 7909 param = self.expression(exp.CopyParameter, this=option) 7910 7911 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7912 TokenType.L_PAREN, advance=False 7913 ): 7914 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7915 param.set("expressions", self._parse_wrapped_options()) 7916 elif prev == "FILE_FORMAT": 7917 # T-SQL's external file format case 7918 param.set("expression", self._parse_field()) 7919 else: 7920 param.set("expression", self._parse_unquoted_field()) 7921 7922 options.append(param) 7923 self._match(sep) 7924 7925 return options 7926 7927 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7928 expr = self.expression(exp.Credentials) 7929 7930 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7931 expr.set("storage", self._parse_field()) 7932 if self._match_text_seq("CREDENTIALS"): 7933 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7934 creds = ( 7935 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7936 ) 7937 expr.set("credentials", creds) 7938 if self._match_text_seq("ENCRYPTION"): 7939 expr.set("encryption", self._parse_wrapped_options()) 7940 if self._match_text_seq("IAM_ROLE"): 7941 expr.set("iam_role", self._parse_field()) 7942 if self._match_text_seq("REGION"): 7943 expr.set("region", self._parse_field()) 7944 7945 return expr 7946 7947 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7948 return self._parse_field() 7949 7950 def _parse_copy(self) -> exp.Copy | exp.Command: 7951 start = self._prev 7952 7953 self._match(TokenType.INTO) 7954 7955 this = ( 7956 self._parse_select(nested=True, parse_subquery_alias=False) 7957 if self._match(TokenType.L_PAREN, advance=False) 7958 else self._parse_table(schema=True) 7959 ) 7960 7961 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7962 7963 files = self._parse_csv(self._parse_file_location) 7964 credentials = self._parse_credentials() 7965 7966 self._match_text_seq("WITH") 7967 7968 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7969 7970 # Fallback case 7971 if self._curr: 7972 return self._parse_as_command(start) 7973 7974 return self.expression( 7975 exp.Copy, 7976 this=this, 7977 kind=kind, 7978 credentials=credentials, 7979 files=files, 7980 params=params, 7981 ) 7982 7983 def _parse_normalize(self) -> exp.Normalize: 7984 return self.expression( 7985 exp.Normalize, 7986 this=self._parse_bitwise(), 7987 form=self._match(TokenType.COMMA) and self._parse_var(), 7988 ) 7989 7990 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7991 args = self._parse_csv(lambda: self._parse_lambda()) 7992 7993 this = seq_get(args, 0) 7994 decimals = seq_get(args, 1) 7995 7996 return expr_type( 7997 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7998 ) 7999 8000 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8001 if self._match_text_seq("COLUMNS", "(", advance=False): 8002 this = self._parse_function() 8003 if isinstance(this, exp.Columns): 8004 this.set("unpack", True) 8005 return this 8006 8007 return self.expression( 8008 exp.Star, 8009 **{ # type: ignore 8010 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8011 "replace": self._parse_star_op("REPLACE"), 8012 "rename": self._parse_star_op("RENAME"), 8013 }, 8014 ) 8015 8016 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8017 privilege_parts = [] 8018 8019 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8020 # (end of privilege list) or L_PAREN (start of column list) are met 8021 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8022 privilege_parts.append(self._curr.text.upper()) 8023 self._advance() 8024 8025 this = exp.var(" ".join(privilege_parts)) 8026 expressions = ( 8027 self._parse_wrapped_csv(self._parse_column) 8028 if self._match(TokenType.L_PAREN, advance=False) 8029 else None 8030 ) 8031 8032 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8033 8034 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8035 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8036 principal = self._parse_id_var() 8037 8038 if not principal: 8039 return None 8040 8041 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8042 8043 def _parse_grant(self) -> exp.Grant | exp.Command: 8044 start = self._prev 8045 8046 privileges = self._parse_csv(self._parse_grant_privilege) 8047 8048 self._match(TokenType.ON) 8049 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8050 8051 # Attempt to parse the securable e.g. MySQL allows names 8052 # such as "foo.*", "*.*" which are not easily parseable yet 8053 securable = self._try_parse(self._parse_table_parts) 8054 8055 if not securable or not self._match_text_seq("TO"): 8056 return self._parse_as_command(start) 8057 8058 principals = self._parse_csv(self._parse_grant_principal) 8059 8060 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8061 8062 if self._curr: 8063 return self._parse_as_command(start) 8064 8065 return self.expression( 8066 exp.Grant, 8067 privileges=privileges, 8068 kind=kind, 8069 securable=securable, 8070 principals=principals, 8071 grant_option=grant_option, 8072 ) 8073 8074 def _parse_overlay(self) -> exp.Overlay: 8075 return self.expression( 8076 exp.Overlay, 8077 **{ # type: ignore 8078 "this": self._parse_bitwise(), 8079 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8080 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8081 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8082 }, 8083 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.MEDIUMBLOB, 332 TokenType.LONGBLOB, 333 TokenType.BINARY, 334 TokenType.VARBINARY, 335 TokenType.JSON, 336 TokenType.JSONB, 337 TokenType.INTERVAL, 338 TokenType.TINYBLOB, 339 TokenType.TINYTEXT, 340 TokenType.TIME, 341 TokenType.TIMETZ, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMP_S, 344 TokenType.TIMESTAMP_MS, 345 TokenType.TIMESTAMP_NS, 346 TokenType.TIMESTAMPTZ, 347 TokenType.TIMESTAMPLTZ, 348 TokenType.TIMESTAMPNTZ, 349 TokenType.DATETIME, 350 TokenType.DATETIME2, 351 TokenType.DATETIME64, 352 TokenType.SMALLDATETIME, 353 TokenType.DATE, 354 TokenType.DATE32, 355 TokenType.INT4RANGE, 356 TokenType.INT4MULTIRANGE, 357 TokenType.INT8RANGE, 358 TokenType.INT8MULTIRANGE, 359 TokenType.NUMRANGE, 360 TokenType.NUMMULTIRANGE, 361 TokenType.TSRANGE, 362 TokenType.TSMULTIRANGE, 363 TokenType.TSTZRANGE, 364 TokenType.TSTZMULTIRANGE, 365 TokenType.DATERANGE, 366 TokenType.DATEMULTIRANGE, 367 TokenType.DECIMAL, 368 TokenType.DECIMAL32, 369 TokenType.DECIMAL64, 370 TokenType.DECIMAL128, 371 TokenType.DECIMAL256, 372 TokenType.UDECIMAL, 373 TokenType.BIGDECIMAL, 374 TokenType.UUID, 375 TokenType.GEOGRAPHY, 376 TokenType.GEOMETRY, 377 TokenType.POINT, 378 TokenType.RING, 379 TokenType.LINESTRING, 380 TokenType.MULTILINESTRING, 381 TokenType.POLYGON, 382 TokenType.MULTIPOLYGON, 383 TokenType.HLLSKETCH, 384 TokenType.HSTORE, 385 TokenType.PSEUDO_TYPE, 386 TokenType.SUPER, 387 TokenType.SERIAL, 388 TokenType.SMALLSERIAL, 389 TokenType.BIGSERIAL, 390 TokenType.XML, 391 TokenType.YEAR, 392 TokenType.USERDEFINED, 393 TokenType.MONEY, 394 TokenType.SMALLMONEY, 395 TokenType.ROWVERSION, 396 TokenType.IMAGE, 397 TokenType.VARIANT, 398 TokenType.VECTOR, 399 TokenType.OBJECT, 400 TokenType.OBJECT_IDENTIFIER, 401 TokenType.INET, 402 TokenType.IPADDRESS, 403 TokenType.IPPREFIX, 404 TokenType.IPV4, 405 TokenType.IPV6, 406 TokenType.UNKNOWN, 407 TokenType.NULL, 408 TokenType.NAME, 409 TokenType.TDIGEST, 410 TokenType.DYNAMIC, 411 *ENUM_TYPE_TOKENS, 412 *NESTED_TYPE_TOKENS, 413 *AGGREGATE_TYPE_TOKENS, 414 } 415 416 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 417 TokenType.BIGINT: TokenType.UBIGINT, 418 TokenType.INT: TokenType.UINT, 419 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 420 TokenType.SMALLINT: TokenType.USMALLINT, 421 TokenType.TINYINT: TokenType.UTINYINT, 422 TokenType.DECIMAL: TokenType.UDECIMAL, 423 TokenType.DOUBLE: TokenType.UDOUBLE, 424 } 425 426 SUBQUERY_PREDICATES = { 427 TokenType.ANY: exp.Any, 428 TokenType.ALL: exp.All, 429 TokenType.EXISTS: exp.Exists, 430 TokenType.SOME: exp.Any, 431 } 432 433 RESERVED_TOKENS = { 434 *Tokenizer.SINGLE_TOKENS.values(), 435 TokenType.SELECT, 436 } - {TokenType.IDENTIFIER} 437 438 DB_CREATABLES = { 439 TokenType.DATABASE, 440 TokenType.DICTIONARY, 441 TokenType.MODEL, 442 TokenType.NAMESPACE, 443 TokenType.SCHEMA, 444 TokenType.SEQUENCE, 445 TokenType.SINK, 446 TokenType.SOURCE, 447 TokenType.STORAGE_INTEGRATION, 448 TokenType.STREAMLIT, 449 TokenType.TABLE, 450 TokenType.TAG, 451 TokenType.VIEW, 452 TokenType.WAREHOUSE, 453 } 454 455 CREATABLES = { 456 TokenType.COLUMN, 457 TokenType.CONSTRAINT, 458 TokenType.FOREIGN_KEY, 459 TokenType.FUNCTION, 460 TokenType.INDEX, 461 TokenType.PROCEDURE, 462 *DB_CREATABLES, 463 } 464 465 ALTERABLES = { 466 TokenType.INDEX, 467 TokenType.TABLE, 468 TokenType.VIEW, 469 } 470 471 # Tokens that can represent identifiers 472 ID_VAR_TOKENS = { 473 TokenType.ALL, 474 TokenType.ATTACH, 475 TokenType.VAR, 476 TokenType.ANTI, 477 TokenType.APPLY, 478 TokenType.ASC, 479 TokenType.ASOF, 480 TokenType.AUTO_INCREMENT, 481 TokenType.BEGIN, 482 TokenType.BPCHAR, 483 TokenType.CACHE, 484 TokenType.CASE, 485 TokenType.COLLATE, 486 TokenType.COMMAND, 487 TokenType.COMMENT, 488 TokenType.COMMIT, 489 TokenType.CONSTRAINT, 490 TokenType.COPY, 491 TokenType.CUBE, 492 TokenType.CURRENT_SCHEMA, 493 TokenType.DEFAULT, 494 TokenType.DELETE, 495 TokenType.DESC, 496 TokenType.DESCRIBE, 497 TokenType.DETACH, 498 TokenType.DICTIONARY, 499 TokenType.DIV, 500 TokenType.END, 501 TokenType.EXECUTE, 502 TokenType.EXPORT, 503 TokenType.ESCAPE, 504 TokenType.FALSE, 505 TokenType.FIRST, 506 TokenType.FILTER, 507 TokenType.FINAL, 508 TokenType.FORMAT, 509 TokenType.FULL, 510 TokenType.IDENTIFIER, 511 TokenType.IS, 512 TokenType.ISNULL, 513 TokenType.INTERVAL, 514 TokenType.KEEP, 515 TokenType.KILL, 516 TokenType.LEFT, 517 TokenType.LIMIT, 518 TokenType.LOAD, 519 TokenType.MERGE, 520 TokenType.NATURAL, 521 TokenType.NEXT, 522 TokenType.OFFSET, 523 TokenType.OPERATOR, 524 TokenType.ORDINALITY, 525 TokenType.OVERLAPS, 526 TokenType.OVERWRITE, 527 TokenType.PARTITION, 528 TokenType.PERCENT, 529 TokenType.PIVOT, 530 TokenType.PRAGMA, 531 TokenType.RANGE, 532 TokenType.RECURSIVE, 533 TokenType.REFERENCES, 534 TokenType.REFRESH, 535 TokenType.RENAME, 536 TokenType.REPLACE, 537 TokenType.RIGHT, 538 TokenType.ROLLUP, 539 TokenType.ROW, 540 TokenType.ROWS, 541 TokenType.SEMI, 542 TokenType.SET, 543 TokenType.SETTINGS, 544 TokenType.SHOW, 545 TokenType.TEMPORARY, 546 TokenType.TOP, 547 TokenType.TRUE, 548 TokenType.TRUNCATE, 549 TokenType.UNIQUE, 550 TokenType.UNNEST, 551 TokenType.UNPIVOT, 552 TokenType.UPDATE, 553 TokenType.USE, 554 TokenType.VOLATILE, 555 TokenType.WINDOW, 556 *CREATABLES, 557 *SUBQUERY_PREDICATES, 558 *TYPE_TOKENS, 559 *NO_PAREN_FUNCTIONS, 560 } 561 ID_VAR_TOKENS.remove(TokenType.UNION) 562 563 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 564 TokenType.ANTI, 565 TokenType.APPLY, 566 TokenType.ASOF, 567 TokenType.FULL, 568 TokenType.LEFT, 569 TokenType.LOCK, 570 TokenType.NATURAL, 571 TokenType.RIGHT, 572 TokenType.SEMI, 573 TokenType.WINDOW, 574 } 575 576 ALIAS_TOKENS = ID_VAR_TOKENS 577 578 ARRAY_CONSTRUCTORS = { 579 "ARRAY": exp.Array, 580 "LIST": exp.List, 581 } 582 583 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 584 585 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 586 587 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 588 589 FUNC_TOKENS = { 590 TokenType.COLLATE, 591 TokenType.COMMAND, 592 TokenType.CURRENT_DATE, 593 TokenType.CURRENT_DATETIME, 594 TokenType.CURRENT_SCHEMA, 595 TokenType.CURRENT_TIMESTAMP, 596 TokenType.CURRENT_TIME, 597 TokenType.CURRENT_USER, 598 TokenType.FILTER, 599 TokenType.FIRST, 600 TokenType.FORMAT, 601 TokenType.GLOB, 602 TokenType.IDENTIFIER, 603 TokenType.INDEX, 604 TokenType.ISNULL, 605 TokenType.ILIKE, 606 TokenType.INSERT, 607 TokenType.LIKE, 608 TokenType.MERGE, 609 TokenType.NEXT, 610 TokenType.OFFSET, 611 TokenType.PRIMARY_KEY, 612 TokenType.RANGE, 613 TokenType.REPLACE, 614 TokenType.RLIKE, 615 TokenType.ROW, 616 TokenType.UNNEST, 617 TokenType.VAR, 618 TokenType.LEFT, 619 TokenType.RIGHT, 620 TokenType.SEQUENCE, 621 TokenType.DATE, 622 TokenType.DATETIME, 623 TokenType.TABLE, 624 TokenType.TIMESTAMP, 625 TokenType.TIMESTAMPTZ, 626 TokenType.TRUNCATE, 627 TokenType.WINDOW, 628 TokenType.XOR, 629 *TYPE_TOKENS, 630 *SUBQUERY_PREDICATES, 631 } 632 633 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 634 TokenType.AND: exp.And, 635 } 636 637 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 638 TokenType.COLON_EQ: exp.PropertyEQ, 639 } 640 641 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 642 TokenType.OR: exp.Or, 643 } 644 645 EQUALITY = { 646 TokenType.EQ: exp.EQ, 647 TokenType.NEQ: exp.NEQ, 648 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 649 } 650 651 COMPARISON = { 652 TokenType.GT: exp.GT, 653 TokenType.GTE: exp.GTE, 654 TokenType.LT: exp.LT, 655 TokenType.LTE: exp.LTE, 656 } 657 658 BITWISE = { 659 TokenType.AMP: exp.BitwiseAnd, 660 TokenType.CARET: exp.BitwiseXor, 661 TokenType.PIPE: exp.BitwiseOr, 662 } 663 664 TERM = { 665 TokenType.DASH: exp.Sub, 666 TokenType.PLUS: exp.Add, 667 TokenType.MOD: exp.Mod, 668 TokenType.COLLATE: exp.Collate, 669 } 670 671 FACTOR = { 672 TokenType.DIV: exp.IntDiv, 673 TokenType.LR_ARROW: exp.Distance, 674 TokenType.SLASH: exp.Div, 675 TokenType.STAR: exp.Mul, 676 } 677 678 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 679 680 TIMES = { 681 TokenType.TIME, 682 TokenType.TIMETZ, 683 } 684 685 TIMESTAMPS = { 686 TokenType.TIMESTAMP, 687 TokenType.TIMESTAMPTZ, 688 TokenType.TIMESTAMPLTZ, 689 *TIMES, 690 } 691 692 SET_OPERATIONS = { 693 TokenType.UNION, 694 TokenType.INTERSECT, 695 TokenType.EXCEPT, 696 } 697 698 JOIN_METHODS = { 699 TokenType.ASOF, 700 TokenType.NATURAL, 701 TokenType.POSITIONAL, 702 } 703 704 JOIN_SIDES = { 705 TokenType.LEFT, 706 TokenType.RIGHT, 707 TokenType.FULL, 708 } 709 710 JOIN_KINDS = { 711 TokenType.ANTI, 712 TokenType.CROSS, 713 TokenType.INNER, 714 TokenType.OUTER, 715 TokenType.SEMI, 716 TokenType.STRAIGHT_JOIN, 717 } 718 719 JOIN_HINTS: t.Set[str] = set() 720 721 LAMBDAS = { 722 TokenType.ARROW: lambda self, expressions: self.expression( 723 exp.Lambda, 724 this=self._replace_lambda( 725 self._parse_assignment(), 726 expressions, 727 ), 728 expressions=expressions, 729 ), 730 TokenType.FARROW: lambda self, expressions: self.expression( 731 exp.Kwarg, 732 this=exp.var(expressions[0].name), 733 expression=self._parse_assignment(), 734 ), 735 } 736 737 COLUMN_OPERATORS = { 738 TokenType.DOT: None, 739 TokenType.DOTCOLON: lambda self, this, to: self.expression( 740 exp.JSONCast, 741 this=this, 742 to=to, 743 ), 744 TokenType.DCOLON: lambda self, this, to: self.expression( 745 exp.Cast if self.STRICT_CAST else exp.TryCast, 746 this=this, 747 to=to, 748 ), 749 TokenType.ARROW: lambda self, this, path: self.expression( 750 exp.JSONExtract, 751 this=this, 752 expression=self.dialect.to_json_path(path), 753 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 754 ), 755 TokenType.DARROW: lambda self, this, path: self.expression( 756 exp.JSONExtractScalar, 757 this=this, 758 expression=self.dialect.to_json_path(path), 759 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 760 ), 761 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 762 exp.JSONBExtract, 763 this=this, 764 expression=path, 765 ), 766 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 767 exp.JSONBExtractScalar, 768 this=this, 769 expression=path, 770 ), 771 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 772 exp.JSONBContains, 773 this=this, 774 expression=key, 775 ), 776 } 777 778 EXPRESSION_PARSERS = { 779 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 780 exp.Column: lambda self: self._parse_column(), 781 exp.Condition: lambda self: self._parse_assignment(), 782 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 783 exp.Expression: lambda self: self._parse_expression(), 784 exp.From: lambda self: self._parse_from(joins=True), 785 exp.Group: lambda self: self._parse_group(), 786 exp.Having: lambda self: self._parse_having(), 787 exp.Hint: lambda self: self._parse_hint_body(), 788 exp.Identifier: lambda self: self._parse_id_var(), 789 exp.Join: lambda self: self._parse_join(), 790 exp.Lambda: lambda self: self._parse_lambda(), 791 exp.Lateral: lambda self: self._parse_lateral(), 792 exp.Limit: lambda self: self._parse_limit(), 793 exp.Offset: lambda self: self._parse_offset(), 794 exp.Order: lambda self: self._parse_order(), 795 exp.Ordered: lambda self: self._parse_ordered(), 796 exp.Properties: lambda self: self._parse_properties(), 797 exp.Qualify: lambda self: self._parse_qualify(), 798 exp.Returning: lambda self: self._parse_returning(), 799 exp.Select: lambda self: self._parse_select(), 800 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 801 exp.Table: lambda self: self._parse_table_parts(), 802 exp.TableAlias: lambda self: self._parse_table_alias(), 803 exp.Tuple: lambda self: self._parse_value(), 804 exp.Whens: lambda self: self._parse_when_matched(), 805 exp.Where: lambda self: self._parse_where(), 806 exp.Window: lambda self: self._parse_named_window(), 807 exp.With: lambda self: self._parse_with(), 808 "JOIN_TYPE": lambda self: self._parse_join_parts(), 809 } 810 811 STATEMENT_PARSERS = { 812 TokenType.ALTER: lambda self: self._parse_alter(), 813 TokenType.ANALYZE: lambda self: self._parse_analyze(), 814 TokenType.BEGIN: lambda self: self._parse_transaction(), 815 TokenType.CACHE: lambda self: self._parse_cache(), 816 TokenType.COMMENT: lambda self: self._parse_comment(), 817 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 818 TokenType.COPY: lambda self: self._parse_copy(), 819 TokenType.CREATE: lambda self: self._parse_create(), 820 TokenType.DELETE: lambda self: self._parse_delete(), 821 TokenType.DESC: lambda self: self._parse_describe(), 822 TokenType.DESCRIBE: lambda self: self._parse_describe(), 823 TokenType.DROP: lambda self: self._parse_drop(), 824 TokenType.GRANT: lambda self: self._parse_grant(), 825 TokenType.INSERT: lambda self: self._parse_insert(), 826 TokenType.KILL: lambda self: self._parse_kill(), 827 TokenType.LOAD: lambda self: self._parse_load(), 828 TokenType.MERGE: lambda self: self._parse_merge(), 829 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 830 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 831 TokenType.REFRESH: lambda self: self._parse_refresh(), 832 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 833 TokenType.SET: lambda self: self._parse_set(), 834 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 835 TokenType.UNCACHE: lambda self: self._parse_uncache(), 836 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 837 TokenType.UPDATE: lambda self: self._parse_update(), 838 TokenType.USE: lambda self: self._parse_use(), 839 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 840 } 841 842 UNARY_PARSERS = { 843 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 844 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 845 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 846 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 847 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 848 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 849 } 850 851 STRING_PARSERS = { 852 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 853 exp.RawString, this=token.text 854 ), 855 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 856 exp.National, this=token.text 857 ), 858 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 859 TokenType.STRING: lambda self, token: self.expression( 860 exp.Literal, this=token.text, is_string=True 861 ), 862 TokenType.UNICODE_STRING: lambda self, token: self.expression( 863 exp.UnicodeString, 864 this=token.text, 865 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 866 ), 867 } 868 869 NUMERIC_PARSERS = { 870 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 871 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 872 TokenType.HEX_STRING: lambda self, token: self.expression( 873 exp.HexString, 874 this=token.text, 875 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 876 ), 877 TokenType.NUMBER: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=False 879 ), 880 } 881 882 PRIMARY_PARSERS = { 883 **STRING_PARSERS, 884 **NUMERIC_PARSERS, 885 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 886 TokenType.NULL: lambda self, _: self.expression(exp.Null), 887 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 888 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 889 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 890 TokenType.STAR: lambda self, _: self._parse_star_ops(), 891 } 892 893 PLACEHOLDER_PARSERS = { 894 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 895 TokenType.PARAMETER: lambda self: self._parse_parameter(), 896 TokenType.COLON: lambda self: ( 897 self.expression(exp.Placeholder, this=self._prev.text) 898 if self._match_set(self.ID_VAR_TOKENS) 899 else None 900 ), 901 } 902 903 RANGE_PARSERS = { 904 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 905 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 906 TokenType.GLOB: binary_range_parser(exp.Glob), 907 TokenType.ILIKE: binary_range_parser(exp.ILike), 908 TokenType.IN: lambda self, this: self._parse_in(this), 909 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 910 TokenType.IS: lambda self, this: self._parse_is(this), 911 TokenType.LIKE: binary_range_parser(exp.Like), 912 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 913 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 914 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 915 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 916 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 917 } 918 919 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 920 "ALLOWED_VALUES": lambda self: self.expression( 921 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 922 ), 923 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 924 "AUTO": lambda self: self._parse_auto_property(), 925 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 926 "BACKUP": lambda self: self.expression( 927 exp.BackupProperty, this=self._parse_var(any_token=True) 928 ), 929 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 930 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 931 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 932 "CHECKSUM": lambda self: self._parse_checksum(), 933 "CLUSTER BY": lambda self: self._parse_cluster(), 934 "CLUSTERED": lambda self: self._parse_clustered_by(), 935 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 936 exp.CollateProperty, **kwargs 937 ), 938 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 939 "CONTAINS": lambda self: self._parse_contains_property(), 940 "COPY": lambda self: self._parse_copy_property(), 941 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 942 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 943 "DEFINER": lambda self: self._parse_definer(), 944 "DETERMINISTIC": lambda self: self.expression( 945 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 946 ), 947 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 948 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 949 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 950 "DISTKEY": lambda self: self._parse_distkey(), 951 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 952 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 953 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 954 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 955 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 956 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 957 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 958 "FREESPACE": lambda self: self._parse_freespace(), 959 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 960 "HEAP": lambda self: self.expression(exp.HeapProperty), 961 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 962 "IMMUTABLE": lambda self: self.expression( 963 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 964 ), 965 "INHERITS": lambda self: self.expression( 966 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 967 ), 968 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 969 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 970 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 971 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 972 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 973 "LIKE": lambda self: self._parse_create_like(), 974 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 975 "LOCK": lambda self: self._parse_locking(), 976 "LOCKING": lambda self: self._parse_locking(), 977 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 978 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 979 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 980 "MODIFIES": lambda self: self._parse_modifies_property(), 981 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 982 "NO": lambda self: self._parse_no_property(), 983 "ON": lambda self: self._parse_on_property(), 984 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 985 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 986 "PARTITION": lambda self: self._parse_partitioned_of(), 987 "PARTITION BY": lambda self: self._parse_partitioned_by(), 988 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 989 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 990 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 991 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 992 "READS": lambda self: self._parse_reads_property(), 993 "REMOTE": lambda self: self._parse_remote_with_connection(), 994 "RETURNS": lambda self: self._parse_returns(), 995 "STRICT": lambda self: self.expression(exp.StrictProperty), 996 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 997 "ROW": lambda self: self._parse_row(), 998 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 999 "SAMPLE": lambda self: self.expression( 1000 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1001 ), 1002 "SECURE": lambda self: self.expression(exp.SecureProperty), 1003 "SECURITY": lambda self: self._parse_security(), 1004 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1005 "SETTINGS": lambda self: self._parse_settings_property(), 1006 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1007 "SORTKEY": lambda self: self._parse_sortkey(), 1008 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1009 "STABLE": lambda self: self.expression( 1010 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1011 ), 1012 "STORED": lambda self: self._parse_stored(), 1013 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1014 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1015 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1016 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1017 "TO": lambda self: self._parse_to_table(), 1018 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1019 "TRANSFORM": lambda self: self.expression( 1020 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1021 ), 1022 "TTL": lambda self: self._parse_ttl(), 1023 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1024 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1025 "VOLATILE": lambda self: self._parse_volatile_property(), 1026 "WITH": lambda self: self._parse_with_property(), 1027 } 1028 1029 CONSTRAINT_PARSERS = { 1030 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1031 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1032 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1033 "CHARACTER SET": lambda self: self.expression( 1034 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1035 ), 1036 "CHECK": lambda self: self.expression( 1037 exp.CheckColumnConstraint, 1038 this=self._parse_wrapped(self._parse_assignment), 1039 enforced=self._match_text_seq("ENFORCED"), 1040 ), 1041 "COLLATE": lambda self: self.expression( 1042 exp.CollateColumnConstraint, 1043 this=self._parse_identifier() or self._parse_column(), 1044 ), 1045 "COMMENT": lambda self: self.expression( 1046 exp.CommentColumnConstraint, this=self._parse_string() 1047 ), 1048 "COMPRESS": lambda self: self._parse_compress(), 1049 "CLUSTERED": lambda self: self.expression( 1050 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1051 ), 1052 "NONCLUSTERED": lambda self: self.expression( 1053 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1054 ), 1055 "DEFAULT": lambda self: self.expression( 1056 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1057 ), 1058 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1059 "EPHEMERAL": lambda self: self.expression( 1060 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1061 ), 1062 "EXCLUDE": lambda self: self.expression( 1063 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1064 ), 1065 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1066 "FORMAT": lambda self: self.expression( 1067 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1068 ), 1069 "GENERATED": lambda self: self._parse_generated_as_identity(), 1070 "IDENTITY": lambda self: self._parse_auto_increment(), 1071 "INLINE": lambda self: self._parse_inline(), 1072 "LIKE": lambda self: self._parse_create_like(), 1073 "NOT": lambda self: self._parse_not_constraint(), 1074 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1075 "ON": lambda self: ( 1076 self._match(TokenType.UPDATE) 1077 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1078 ) 1079 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1080 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1081 "PERIOD": lambda self: self._parse_period_for_system_time(), 1082 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1083 "REFERENCES": lambda self: self._parse_references(match=False), 1084 "TITLE": lambda self: self.expression( 1085 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1088 "UNIQUE": lambda self: self._parse_unique(), 1089 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1090 "WATERMARK": lambda self: self.expression( 1091 exp.WatermarkColumnConstraint, 1092 this=self._match(TokenType.FOR) and self._parse_column(), 1093 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1094 ), 1095 "WITH": lambda self: self.expression( 1096 exp.Properties, expressions=self._parse_wrapped_properties() 1097 ), 1098 } 1099 1100 ALTER_PARSERS = { 1101 "ADD": lambda self: self._parse_alter_table_add(), 1102 "AS": lambda self: self._parse_select(), 1103 "ALTER": lambda self: self._parse_alter_table_alter(), 1104 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1105 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1106 "DROP": lambda self: self._parse_alter_table_drop(), 1107 "RENAME": lambda self: self._parse_alter_table_rename(), 1108 "SET": lambda self: self._parse_alter_table_set(), 1109 "SWAP": lambda self: self.expression( 1110 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1111 ), 1112 } 1113 1114 ALTER_ALTER_PARSERS = { 1115 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1116 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1117 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1118 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1119 } 1120 1121 SCHEMA_UNNAMED_CONSTRAINTS = { 1122 "CHECK", 1123 "EXCLUDE", 1124 "FOREIGN KEY", 1125 "LIKE", 1126 "PERIOD", 1127 "PRIMARY KEY", 1128 "UNIQUE", 1129 "WATERMARK", 1130 } 1131 1132 NO_PAREN_FUNCTION_PARSERS = { 1133 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1134 "CASE": lambda self: self._parse_case(), 1135 "CONNECT_BY_ROOT": lambda self: self.expression( 1136 exp.ConnectByRoot, this=self._parse_column() 1137 ), 1138 "IF": lambda self: self._parse_if(), 1139 } 1140 1141 INVALID_FUNC_NAME_TOKENS = { 1142 TokenType.IDENTIFIER, 1143 TokenType.STRING, 1144 } 1145 1146 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1147 1148 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1149 1150 FUNCTION_PARSERS = { 1151 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1152 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1153 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1154 "DECODE": lambda self: self._parse_decode(), 1155 "EXTRACT": lambda self: self._parse_extract(), 1156 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1157 "GAP_FILL": lambda self: self._parse_gap_fill(), 1158 "JSON_OBJECT": lambda self: self._parse_json_object(), 1159 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1160 "JSON_TABLE": lambda self: self._parse_json_table(), 1161 "MATCH": lambda self: self._parse_match_against(), 1162 "NORMALIZE": lambda self: self._parse_normalize(), 1163 "OPENJSON": lambda self: self._parse_open_json(), 1164 "OVERLAY": lambda self: self._parse_overlay(), 1165 "POSITION": lambda self: self._parse_position(), 1166 "PREDICT": lambda self: self._parse_predict(), 1167 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1168 "STRING_AGG": lambda self: self._parse_string_agg(), 1169 "SUBSTRING": lambda self: self._parse_substring(), 1170 "TRIM": lambda self: self._parse_trim(), 1171 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1172 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1173 "XMLELEMENT": lambda self: self.expression( 1174 exp.XMLElement, 1175 this=self._match_text_seq("NAME") and self._parse_id_var(), 1176 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1177 ), 1178 "XMLTABLE": lambda self: self._parse_xml_table(), 1179 } 1180 1181 QUERY_MODIFIER_PARSERS = { 1182 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1183 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1184 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1185 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1186 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1187 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1188 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1189 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1190 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1191 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1192 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1193 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1194 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1195 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1196 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1197 TokenType.CLUSTER_BY: lambda self: ( 1198 "cluster", 1199 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1200 ), 1201 TokenType.DISTRIBUTE_BY: lambda self: ( 1202 "distribute", 1203 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1204 ), 1205 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1206 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1207 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1208 } 1209 1210 SET_PARSERS = { 1211 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1212 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1213 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1214 "TRANSACTION": lambda self: self._parse_set_transaction(), 1215 } 1216 1217 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1218 1219 TYPE_LITERAL_PARSERS = { 1220 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1221 } 1222 1223 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1224 1225 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1226 1227 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1228 1229 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1230 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1231 "ISOLATION": ( 1232 ("LEVEL", "REPEATABLE", "READ"), 1233 ("LEVEL", "READ", "COMMITTED"), 1234 ("LEVEL", "READ", "UNCOMITTED"), 1235 ("LEVEL", "SERIALIZABLE"), 1236 ), 1237 "READ": ("WRITE", "ONLY"), 1238 } 1239 1240 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1241 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1242 ) 1243 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1244 1245 CREATE_SEQUENCE: OPTIONS_TYPE = { 1246 "SCALE": ("EXTEND", "NOEXTEND"), 1247 "SHARD": ("EXTEND", "NOEXTEND"), 1248 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1249 **dict.fromkeys( 1250 ( 1251 "SESSION", 1252 "GLOBAL", 1253 "KEEP", 1254 "NOKEEP", 1255 "ORDER", 1256 "NOORDER", 1257 "NOCACHE", 1258 "CYCLE", 1259 "NOCYCLE", 1260 "NOMINVALUE", 1261 "NOMAXVALUE", 1262 "NOSCALE", 1263 "NOSHARD", 1264 ), 1265 tuple(), 1266 ), 1267 } 1268 1269 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1270 1271 USABLES: OPTIONS_TYPE = dict.fromkeys( 1272 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1273 ) 1274 1275 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1276 1277 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1278 "TYPE": ("EVOLUTION",), 1279 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1280 } 1281 1282 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1283 1284 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1285 1286 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1287 "NOT": ("ENFORCED",), 1288 "MATCH": ( 1289 "FULL", 1290 "PARTIAL", 1291 "SIMPLE", 1292 ), 1293 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1294 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1295 } 1296 1297 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1298 1299 CLONE_KEYWORDS = {"CLONE", "COPY"} 1300 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1301 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1302 1303 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1304 1305 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1306 1307 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1308 1309 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1310 1311 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1312 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1313 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1314 1315 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1316 1317 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1318 1319 ADD_CONSTRAINT_TOKENS = { 1320 TokenType.CONSTRAINT, 1321 TokenType.FOREIGN_KEY, 1322 TokenType.INDEX, 1323 TokenType.KEY, 1324 TokenType.PRIMARY_KEY, 1325 TokenType.UNIQUE, 1326 } 1327 1328 DISTINCT_TOKENS = {TokenType.DISTINCT} 1329 1330 NULL_TOKENS = {TokenType.NULL} 1331 1332 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1333 1334 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1335 1336 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1337 1338 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1339 1340 ODBC_DATETIME_LITERALS = { 1341 "d": exp.Date, 1342 "t": exp.Time, 1343 "ts": exp.Timestamp, 1344 } 1345 1346 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1347 1348 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1349 1350 # The style options for the DESCRIBE statement 1351 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1352 1353 # The style options for the ANALYZE statement 1354 ANALYZE_STYLES = { 1355 "BUFFER_USAGE_LIMIT", 1356 "FULL", 1357 "LOCAL", 1358 "NO_WRITE_TO_BINLOG", 1359 "SAMPLE", 1360 "SKIP_LOCKED", 1361 "VERBOSE", 1362 } 1363 1364 ANALYZE_EXPRESSION_PARSERS = { 1365 "ALL": lambda self: self._parse_analyze_columns(), 1366 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1367 "DELETE": lambda self: self._parse_analyze_delete(), 1368 "DROP": lambda self: self._parse_analyze_histogram(), 1369 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1370 "LIST": lambda self: self._parse_analyze_list(), 1371 "PREDICATE": lambda self: self._parse_analyze_columns(), 1372 "UPDATE": lambda self: self._parse_analyze_histogram(), 1373 "VALIDATE": lambda self: self._parse_analyze_validate(), 1374 } 1375 1376 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1377 1378 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1379 1380 OPERATION_MODIFIERS: t.Set[str] = set() 1381 1382 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1383 1384 STRICT_CAST = True 1385 1386 PREFIXED_PIVOT_COLUMNS = False 1387 IDENTIFY_PIVOT_STRINGS = False 1388 1389 LOG_DEFAULTS_TO_LN = False 1390 1391 # Whether ADD is present for each column added by ALTER TABLE 1392 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1393 1394 # Whether the table sample clause expects CSV syntax 1395 TABLESAMPLE_CSV = False 1396 1397 # The default method used for table sampling 1398 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1399 1400 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1401 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1402 1403 # Whether the TRIM function expects the characters to trim as its first argument 1404 TRIM_PATTERN_FIRST = False 1405 1406 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1407 STRING_ALIASES = False 1408 1409 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1410 MODIFIERS_ATTACHED_TO_SET_OP = True 1411 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1412 1413 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1414 NO_PAREN_IF_COMMANDS = True 1415 1416 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1417 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1418 1419 # Whether the `:` operator is used to extract a value from a VARIANT column 1420 COLON_IS_VARIANT_EXTRACT = False 1421 1422 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1423 # If this is True and '(' is not found, the keyword will be treated as an identifier 1424 VALUES_FOLLOWED_BY_PAREN = True 1425 1426 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1427 SUPPORTS_IMPLICIT_UNNEST = False 1428 1429 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1430 INTERVAL_SPANS = True 1431 1432 # Whether a PARTITION clause can follow a table reference 1433 SUPPORTS_PARTITION_SELECTION = False 1434 1435 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1436 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1437 1438 # Whether the 'AS' keyword is optional in the CTE definition syntax 1439 OPTIONAL_ALIAS_TOKEN_CTE = True 1440 1441 __slots__ = ( 1442 "error_level", 1443 "error_message_context", 1444 "max_errors", 1445 "dialect", 1446 "sql", 1447 "errors", 1448 "_tokens", 1449 "_index", 1450 "_curr", 1451 "_next", 1452 "_prev", 1453 "_prev_comments", 1454 ) 1455 1456 # Autofilled 1457 SHOW_TRIE: t.Dict = {} 1458 SET_TRIE: t.Dict = {} 1459 1460 def __init__( 1461 self, 1462 error_level: t.Optional[ErrorLevel] = None, 1463 error_message_context: int = 100, 1464 max_errors: int = 3, 1465 dialect: DialectType = None, 1466 ): 1467 from sqlglot.dialects import Dialect 1468 1469 self.error_level = error_level or ErrorLevel.IMMEDIATE 1470 self.error_message_context = error_message_context 1471 self.max_errors = max_errors 1472 self.dialect = Dialect.get_or_raise(dialect) 1473 self.reset() 1474 1475 def reset(self): 1476 self.sql = "" 1477 self.errors = [] 1478 self._tokens = [] 1479 self._index = 0 1480 self._curr = None 1481 self._next = None 1482 self._prev = None 1483 self._prev_comments = None 1484 1485 def parse( 1486 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1487 ) -> t.List[t.Optional[exp.Expression]]: 1488 """ 1489 Parses a list of tokens and returns a list of syntax trees, one tree 1490 per parsed SQL statement. 1491 1492 Args: 1493 raw_tokens: The list of tokens. 1494 sql: The original SQL string, used to produce helpful debug messages. 1495 1496 Returns: 1497 The list of the produced syntax trees. 1498 """ 1499 return self._parse( 1500 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1501 ) 1502 1503 def parse_into( 1504 self, 1505 expression_types: exp.IntoType, 1506 raw_tokens: t.List[Token], 1507 sql: t.Optional[str] = None, 1508 ) -> t.List[t.Optional[exp.Expression]]: 1509 """ 1510 Parses a list of tokens into a given Expression type. If a collection of Expression 1511 types is given instead, this method will try to parse the token list into each one 1512 of them, stopping at the first for which the parsing succeeds. 1513 1514 Args: 1515 expression_types: The expression type(s) to try and parse the token list into. 1516 raw_tokens: The list of tokens. 1517 sql: The original SQL string, used to produce helpful debug messages. 1518 1519 Returns: 1520 The target Expression. 1521 """ 1522 errors = [] 1523 for expression_type in ensure_list(expression_types): 1524 parser = self.EXPRESSION_PARSERS.get(expression_type) 1525 if not parser: 1526 raise TypeError(f"No parser registered for {expression_type}") 1527 1528 try: 1529 return self._parse(parser, raw_tokens, sql) 1530 except ParseError as e: 1531 e.errors[0]["into_expression"] = expression_type 1532 errors.append(e) 1533 1534 raise ParseError( 1535 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1536 errors=merge_errors(errors), 1537 ) from errors[-1] 1538 1539 def _parse( 1540 self, 1541 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1542 raw_tokens: t.List[Token], 1543 sql: t.Optional[str] = None, 1544 ) -> t.List[t.Optional[exp.Expression]]: 1545 self.reset() 1546 self.sql = sql or "" 1547 1548 total = len(raw_tokens) 1549 chunks: t.List[t.List[Token]] = [[]] 1550 1551 for i, token in enumerate(raw_tokens): 1552 if token.token_type == TokenType.SEMICOLON: 1553 if token.comments: 1554 chunks.append([token]) 1555 1556 if i < total - 1: 1557 chunks.append([]) 1558 else: 1559 chunks[-1].append(token) 1560 1561 expressions = [] 1562 1563 for tokens in chunks: 1564 self._index = -1 1565 self._tokens = tokens 1566 self._advance() 1567 1568 expressions.append(parse_method(self)) 1569 1570 if self._index < len(self._tokens): 1571 self.raise_error("Invalid expression / Unexpected token") 1572 1573 self.check_errors() 1574 1575 return expressions 1576 1577 def check_errors(self) -> None: 1578 """Logs or raises any found errors, depending on the chosen error level setting.""" 1579 if self.error_level == ErrorLevel.WARN: 1580 for error in self.errors: 1581 logger.error(str(error)) 1582 elif self.error_level == ErrorLevel.RAISE and self.errors: 1583 raise ParseError( 1584 concat_messages(self.errors, self.max_errors), 1585 errors=merge_errors(self.errors), 1586 ) 1587 1588 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1589 """ 1590 Appends an error in the list of recorded errors or raises it, depending on the chosen 1591 error level setting. 1592 """ 1593 token = token or self._curr or self._prev or Token.string("") 1594 start = token.start 1595 end = token.end + 1 1596 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1597 highlight = self.sql[start:end] 1598 end_context = self.sql[end : end + self.error_message_context] 1599 1600 error = ParseError.new( 1601 f"{message}. Line {token.line}, Col: {token.col}.\n" 1602 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1603 description=message, 1604 line=token.line, 1605 col=token.col, 1606 start_context=start_context, 1607 highlight=highlight, 1608 end_context=end_context, 1609 ) 1610 1611 if self.error_level == ErrorLevel.IMMEDIATE: 1612 raise error 1613 1614 self.errors.append(error) 1615 1616 def expression( 1617 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1618 ) -> E: 1619 """ 1620 Creates a new, validated Expression. 1621 1622 Args: 1623 exp_class: The expression class to instantiate. 1624 comments: An optional list of comments to attach to the expression. 1625 kwargs: The arguments to set for the expression along with their respective values. 1626 1627 Returns: 1628 The target expression. 1629 """ 1630 instance = exp_class(**kwargs) 1631 instance.add_comments(comments) if comments else self._add_comments(instance) 1632 return self.validate_expression(instance) 1633 1634 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1635 if expression and self._prev_comments: 1636 expression.add_comments(self._prev_comments) 1637 self._prev_comments = None 1638 1639 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1640 """ 1641 Validates an Expression, making sure that all its mandatory arguments are set. 1642 1643 Args: 1644 expression: The expression to validate. 1645 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1646 1647 Returns: 1648 The validated expression. 1649 """ 1650 if self.error_level != ErrorLevel.IGNORE: 1651 for error_message in expression.error_messages(args): 1652 self.raise_error(error_message) 1653 1654 return expression 1655 1656 def _find_sql(self, start: Token, end: Token) -> str: 1657 return self.sql[start.start : end.end + 1] 1658 1659 def _is_connected(self) -> bool: 1660 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1661 1662 def _advance(self, times: int = 1) -> None: 1663 self._index += times 1664 self._curr = seq_get(self._tokens, self._index) 1665 self._next = seq_get(self._tokens, self._index + 1) 1666 1667 if self._index > 0: 1668 self._prev = self._tokens[self._index - 1] 1669 self._prev_comments = self._prev.comments 1670 else: 1671 self._prev = None 1672 self._prev_comments = None 1673 1674 def _retreat(self, index: int) -> None: 1675 if index != self._index: 1676 self._advance(index - self._index) 1677 1678 def _warn_unsupported(self) -> None: 1679 if len(self._tokens) <= 1: 1680 return 1681 1682 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1683 # interested in emitting a warning for the one being currently processed. 1684 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1685 1686 logger.warning( 1687 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1688 ) 1689 1690 def _parse_command(self) -> exp.Command: 1691 self._warn_unsupported() 1692 return self.expression( 1693 exp.Command, 1694 comments=self._prev_comments, 1695 this=self._prev.text.upper(), 1696 expression=self._parse_string(), 1697 ) 1698 1699 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1700 """ 1701 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1702 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1703 solve this by setting & resetting the parser state accordingly 1704 """ 1705 index = self._index 1706 error_level = self.error_level 1707 1708 self.error_level = ErrorLevel.IMMEDIATE 1709 try: 1710 this = parse_method() 1711 except ParseError: 1712 this = None 1713 finally: 1714 if not this or retreat: 1715 self._retreat(index) 1716 self.error_level = error_level 1717 1718 return this 1719 1720 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1721 start = self._prev 1722 exists = self._parse_exists() if allow_exists else None 1723 1724 self._match(TokenType.ON) 1725 1726 materialized = self._match_text_seq("MATERIALIZED") 1727 kind = self._match_set(self.CREATABLES) and self._prev 1728 if not kind: 1729 return self._parse_as_command(start) 1730 1731 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1732 this = self._parse_user_defined_function(kind=kind.token_type) 1733 elif kind.token_type == TokenType.TABLE: 1734 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1735 elif kind.token_type == TokenType.COLUMN: 1736 this = self._parse_column() 1737 else: 1738 this = self._parse_id_var() 1739 1740 self._match(TokenType.IS) 1741 1742 return self.expression( 1743 exp.Comment, 1744 this=this, 1745 kind=kind.text, 1746 expression=self._parse_string(), 1747 exists=exists, 1748 materialized=materialized, 1749 ) 1750 1751 def _parse_to_table( 1752 self, 1753 ) -> exp.ToTableProperty: 1754 table = self._parse_table_parts(schema=True) 1755 return self.expression(exp.ToTableProperty, this=table) 1756 1757 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1758 def _parse_ttl(self) -> exp.Expression: 1759 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1760 this = self._parse_bitwise() 1761 1762 if self._match_text_seq("DELETE"): 1763 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1764 if self._match_text_seq("RECOMPRESS"): 1765 return self.expression( 1766 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1767 ) 1768 if self._match_text_seq("TO", "DISK"): 1769 return self.expression( 1770 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1771 ) 1772 if self._match_text_seq("TO", "VOLUME"): 1773 return self.expression( 1774 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1775 ) 1776 1777 return this 1778 1779 expressions = self._parse_csv(_parse_ttl_action) 1780 where = self._parse_where() 1781 group = self._parse_group() 1782 1783 aggregates = None 1784 if group and self._match(TokenType.SET): 1785 aggregates = self._parse_csv(self._parse_set_item) 1786 1787 return self.expression( 1788 exp.MergeTreeTTL, 1789 expressions=expressions, 1790 where=where, 1791 group=group, 1792 aggregates=aggregates, 1793 ) 1794 1795 def _parse_statement(self) -> t.Optional[exp.Expression]: 1796 if self._curr is None: 1797 return None 1798 1799 if self._match_set(self.STATEMENT_PARSERS): 1800 comments = self._prev_comments 1801 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1802 stmt.add_comments(comments, prepend=True) 1803 return stmt 1804 1805 if self._match_set(self.dialect.tokenizer.COMMANDS): 1806 return self._parse_command() 1807 1808 expression = self._parse_expression() 1809 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1810 return self._parse_query_modifiers(expression) 1811 1812 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1813 start = self._prev 1814 temporary = self._match(TokenType.TEMPORARY) 1815 materialized = self._match_text_seq("MATERIALIZED") 1816 1817 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1818 if not kind: 1819 return self._parse_as_command(start) 1820 1821 concurrently = self._match_text_seq("CONCURRENTLY") 1822 if_exists = exists or self._parse_exists() 1823 1824 if kind == "COLUMN": 1825 this = self._parse_column() 1826 else: 1827 this = self._parse_table_parts( 1828 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1829 ) 1830 1831 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1832 1833 if self._match(TokenType.L_PAREN, advance=False): 1834 expressions = self._parse_wrapped_csv(self._parse_types) 1835 else: 1836 expressions = None 1837 1838 return self.expression( 1839 exp.Drop, 1840 exists=if_exists, 1841 this=this, 1842 expressions=expressions, 1843 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1844 temporary=temporary, 1845 materialized=materialized, 1846 cascade=self._match_text_seq("CASCADE"), 1847 constraints=self._match_text_seq("CONSTRAINTS"), 1848 purge=self._match_text_seq("PURGE"), 1849 cluster=cluster, 1850 concurrently=concurrently, 1851 ) 1852 1853 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1854 return ( 1855 self._match_text_seq("IF") 1856 and (not not_ or self._match(TokenType.NOT)) 1857 and self._match(TokenType.EXISTS) 1858 ) 1859 1860 def _parse_create(self) -> exp.Create | exp.Command: 1861 # Note: this can't be None because we've matched a statement parser 1862 start = self._prev 1863 1864 replace = ( 1865 start.token_type == TokenType.REPLACE 1866 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1867 or self._match_pair(TokenType.OR, TokenType.ALTER) 1868 ) 1869 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1870 1871 unique = self._match(TokenType.UNIQUE) 1872 1873 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1874 clustered = True 1875 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1876 "COLUMNSTORE" 1877 ): 1878 clustered = False 1879 else: 1880 clustered = None 1881 1882 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1883 self._advance() 1884 1885 properties = None 1886 create_token = self._match_set(self.CREATABLES) and self._prev 1887 1888 if not create_token: 1889 # exp.Properties.Location.POST_CREATE 1890 properties = self._parse_properties() 1891 create_token = self._match_set(self.CREATABLES) and self._prev 1892 1893 if not properties or not create_token: 1894 return self._parse_as_command(start) 1895 1896 concurrently = self._match_text_seq("CONCURRENTLY") 1897 exists = self._parse_exists(not_=True) 1898 this = None 1899 expression: t.Optional[exp.Expression] = None 1900 indexes = None 1901 no_schema_binding = None 1902 begin = None 1903 end = None 1904 clone = None 1905 1906 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1907 nonlocal properties 1908 if properties and temp_props: 1909 properties.expressions.extend(temp_props.expressions) 1910 elif temp_props: 1911 properties = temp_props 1912 1913 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1914 this = self._parse_user_defined_function(kind=create_token.token_type) 1915 1916 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1917 extend_props(self._parse_properties()) 1918 1919 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1920 extend_props(self._parse_properties()) 1921 1922 if not expression: 1923 if self._match(TokenType.COMMAND): 1924 expression = self._parse_as_command(self._prev) 1925 else: 1926 begin = self._match(TokenType.BEGIN) 1927 return_ = self._match_text_seq("RETURN") 1928 1929 if self._match(TokenType.STRING, advance=False): 1930 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1931 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1932 expression = self._parse_string() 1933 extend_props(self._parse_properties()) 1934 else: 1935 expression = self._parse_user_defined_function_expression() 1936 1937 end = self._match_text_seq("END") 1938 1939 if return_: 1940 expression = self.expression(exp.Return, this=expression) 1941 elif create_token.token_type == TokenType.INDEX: 1942 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1943 if not self._match(TokenType.ON): 1944 index = self._parse_id_var() 1945 anonymous = False 1946 else: 1947 index = None 1948 anonymous = True 1949 1950 this = self._parse_index(index=index, anonymous=anonymous) 1951 elif create_token.token_type in self.DB_CREATABLES: 1952 table_parts = self._parse_table_parts( 1953 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1954 ) 1955 1956 # exp.Properties.Location.POST_NAME 1957 self._match(TokenType.COMMA) 1958 extend_props(self._parse_properties(before=True)) 1959 1960 this = self._parse_schema(this=table_parts) 1961 1962 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1963 extend_props(self._parse_properties()) 1964 1965 self._match(TokenType.ALIAS) 1966 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1967 # exp.Properties.Location.POST_ALIAS 1968 extend_props(self._parse_properties()) 1969 1970 if create_token.token_type == TokenType.SEQUENCE: 1971 expression = self._parse_types() 1972 extend_props(self._parse_properties()) 1973 else: 1974 expression = self._parse_ddl_select() 1975 1976 if create_token.token_type == TokenType.TABLE: 1977 # exp.Properties.Location.POST_EXPRESSION 1978 extend_props(self._parse_properties()) 1979 1980 indexes = [] 1981 while True: 1982 index = self._parse_index() 1983 1984 # exp.Properties.Location.POST_INDEX 1985 extend_props(self._parse_properties()) 1986 if not index: 1987 break 1988 else: 1989 self._match(TokenType.COMMA) 1990 indexes.append(index) 1991 elif create_token.token_type == TokenType.VIEW: 1992 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1993 no_schema_binding = True 1994 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1995 extend_props(self._parse_properties()) 1996 1997 shallow = self._match_text_seq("SHALLOW") 1998 1999 if self._match_texts(self.CLONE_KEYWORDS): 2000 copy = self._prev.text.lower() == "copy" 2001 clone = self.expression( 2002 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2003 ) 2004 2005 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2006 return self._parse_as_command(start) 2007 2008 create_kind_text = create_token.text.upper() 2009 return self.expression( 2010 exp.Create, 2011 this=this, 2012 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2013 replace=replace, 2014 refresh=refresh, 2015 unique=unique, 2016 expression=expression, 2017 exists=exists, 2018 properties=properties, 2019 indexes=indexes, 2020 no_schema_binding=no_schema_binding, 2021 begin=begin, 2022 end=end, 2023 clone=clone, 2024 concurrently=concurrently, 2025 clustered=clustered, 2026 ) 2027 2028 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2029 seq = exp.SequenceProperties() 2030 2031 options = [] 2032 index = self._index 2033 2034 while self._curr: 2035 self._match(TokenType.COMMA) 2036 if self._match_text_seq("INCREMENT"): 2037 self._match_text_seq("BY") 2038 self._match_text_seq("=") 2039 seq.set("increment", self._parse_term()) 2040 elif self._match_text_seq("MINVALUE"): 2041 seq.set("minvalue", self._parse_term()) 2042 elif self._match_text_seq("MAXVALUE"): 2043 seq.set("maxvalue", self._parse_term()) 2044 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2045 self._match_text_seq("=") 2046 seq.set("start", self._parse_term()) 2047 elif self._match_text_seq("CACHE"): 2048 # T-SQL allows empty CACHE which is initialized dynamically 2049 seq.set("cache", self._parse_number() or True) 2050 elif self._match_text_seq("OWNED", "BY"): 2051 # "OWNED BY NONE" is the default 2052 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2053 else: 2054 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2055 if opt: 2056 options.append(opt) 2057 else: 2058 break 2059 2060 seq.set("options", options if options else None) 2061 return None if self._index == index else seq 2062 2063 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2064 # only used for teradata currently 2065 self._match(TokenType.COMMA) 2066 2067 kwargs = { 2068 "no": self._match_text_seq("NO"), 2069 "dual": self._match_text_seq("DUAL"), 2070 "before": self._match_text_seq("BEFORE"), 2071 "default": self._match_text_seq("DEFAULT"), 2072 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2073 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2074 "after": self._match_text_seq("AFTER"), 2075 "minimum": self._match_texts(("MIN", "MINIMUM")), 2076 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2077 } 2078 2079 if self._match_texts(self.PROPERTY_PARSERS): 2080 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2081 try: 2082 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2083 except TypeError: 2084 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2085 2086 return None 2087 2088 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2089 return self._parse_wrapped_csv(self._parse_property) 2090 2091 def _parse_property(self) -> t.Optional[exp.Expression]: 2092 if self._match_texts(self.PROPERTY_PARSERS): 2093 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2094 2095 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2096 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2097 2098 if self._match_text_seq("COMPOUND", "SORTKEY"): 2099 return self._parse_sortkey(compound=True) 2100 2101 if self._match_text_seq("SQL", "SECURITY"): 2102 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2103 2104 index = self._index 2105 key = self._parse_column() 2106 2107 if not self._match(TokenType.EQ): 2108 self._retreat(index) 2109 return self._parse_sequence_properties() 2110 2111 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2112 if isinstance(key, exp.Column): 2113 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2114 2115 value = self._parse_bitwise() or self._parse_var(any_token=True) 2116 2117 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2118 if isinstance(value, exp.Column): 2119 value = exp.var(value.name) 2120 2121 return self.expression(exp.Property, this=key, value=value) 2122 2123 def _parse_stored(self) -> exp.FileFormatProperty: 2124 self._match(TokenType.ALIAS) 2125 2126 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2127 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2128 2129 return self.expression( 2130 exp.FileFormatProperty, 2131 this=( 2132 self.expression( 2133 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2134 ) 2135 if input_format or output_format 2136 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2137 ), 2138 ) 2139 2140 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2141 field = self._parse_field() 2142 if isinstance(field, exp.Identifier) and not field.quoted: 2143 field = exp.var(field) 2144 2145 return field 2146 2147 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2148 self._match(TokenType.EQ) 2149 self._match(TokenType.ALIAS) 2150 2151 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2152 2153 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2154 properties = [] 2155 while True: 2156 if before: 2157 prop = self._parse_property_before() 2158 else: 2159 prop = self._parse_property() 2160 if not prop: 2161 break 2162 for p in ensure_list(prop): 2163 properties.append(p) 2164 2165 if properties: 2166 return self.expression(exp.Properties, expressions=properties) 2167 2168 return None 2169 2170 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2171 return self.expression( 2172 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2173 ) 2174 2175 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2176 if self._match_texts(("DEFINER", "INVOKER")): 2177 security_specifier = self._prev.text.upper() 2178 return self.expression(exp.SecurityProperty, this=security_specifier) 2179 return None 2180 2181 def _parse_settings_property(self) -> exp.SettingsProperty: 2182 return self.expression( 2183 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2184 ) 2185 2186 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2187 if self._index >= 2: 2188 pre_volatile_token = self._tokens[self._index - 2] 2189 else: 2190 pre_volatile_token = None 2191 2192 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2193 return exp.VolatileProperty() 2194 2195 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2196 2197 def _parse_retention_period(self) -> exp.Var: 2198 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2199 number = self._parse_number() 2200 number_str = f"{number} " if number else "" 2201 unit = self._parse_var(any_token=True) 2202 return exp.var(f"{number_str}{unit}") 2203 2204 def _parse_system_versioning_property( 2205 self, with_: bool = False 2206 ) -> exp.WithSystemVersioningProperty: 2207 self._match(TokenType.EQ) 2208 prop = self.expression( 2209 exp.WithSystemVersioningProperty, 2210 **{ # type: ignore 2211 "on": True, 2212 "with": with_, 2213 }, 2214 ) 2215 2216 if self._match_text_seq("OFF"): 2217 prop.set("on", False) 2218 return prop 2219 2220 self._match(TokenType.ON) 2221 if self._match(TokenType.L_PAREN): 2222 while self._curr and not self._match(TokenType.R_PAREN): 2223 if self._match_text_seq("HISTORY_TABLE", "="): 2224 prop.set("this", self._parse_table_parts()) 2225 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2226 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2227 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2228 prop.set("retention_period", self._parse_retention_period()) 2229 2230 self._match(TokenType.COMMA) 2231 2232 return prop 2233 2234 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2235 self._match(TokenType.EQ) 2236 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2237 prop = self.expression(exp.DataDeletionProperty, on=on) 2238 2239 if self._match(TokenType.L_PAREN): 2240 while self._curr and not self._match(TokenType.R_PAREN): 2241 if self._match_text_seq("FILTER_COLUMN", "="): 2242 prop.set("filter_column", self._parse_column()) 2243 elif self._match_text_seq("RETENTION_PERIOD", "="): 2244 prop.set("retention_period", self._parse_retention_period()) 2245 2246 self._match(TokenType.COMMA) 2247 2248 return prop 2249 2250 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2251 kind = "HASH" 2252 expressions: t.Optional[t.List[exp.Expression]] = None 2253 if self._match_text_seq("BY", "HASH"): 2254 expressions = self._parse_wrapped_csv(self._parse_id_var) 2255 elif self._match_text_seq("BY", "RANDOM"): 2256 kind = "RANDOM" 2257 2258 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2259 buckets: t.Optional[exp.Expression] = None 2260 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2261 buckets = self._parse_number() 2262 2263 return self.expression( 2264 exp.DistributedByProperty, 2265 expressions=expressions, 2266 kind=kind, 2267 buckets=buckets, 2268 order=self._parse_order(), 2269 ) 2270 2271 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2272 self._match_text_seq("KEY") 2273 expressions = self._parse_wrapped_id_vars() 2274 return self.expression(expr_type, expressions=expressions) 2275 2276 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2277 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2278 prop = self._parse_system_versioning_property(with_=True) 2279 self._match_r_paren() 2280 return prop 2281 2282 if self._match(TokenType.L_PAREN, advance=False): 2283 return self._parse_wrapped_properties() 2284 2285 if self._match_text_seq("JOURNAL"): 2286 return self._parse_withjournaltable() 2287 2288 if self._match_texts(self.VIEW_ATTRIBUTES): 2289 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2290 2291 if self._match_text_seq("DATA"): 2292 return self._parse_withdata(no=False) 2293 elif self._match_text_seq("NO", "DATA"): 2294 return self._parse_withdata(no=True) 2295 2296 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2297 return self._parse_serde_properties(with_=True) 2298 2299 if self._match(TokenType.SCHEMA): 2300 return self.expression( 2301 exp.WithSchemaBindingProperty, 2302 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2303 ) 2304 2305 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2306 return self.expression( 2307 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2308 ) 2309 2310 if not self._next: 2311 return None 2312 2313 return self._parse_withisolatedloading() 2314 2315 def _parse_procedure_option(self) -> exp.Expression | None: 2316 if self._match_text_seq("EXECUTE", "AS"): 2317 return self.expression( 2318 exp.ExecuteAsProperty, 2319 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2320 or self._parse_string(), 2321 ) 2322 2323 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2324 2325 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2326 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2327 self._match(TokenType.EQ) 2328 2329 user = self._parse_id_var() 2330 self._match(TokenType.PARAMETER) 2331 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2332 2333 if not user or not host: 2334 return None 2335 2336 return exp.DefinerProperty(this=f"{user}@{host}") 2337 2338 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2339 self._match(TokenType.TABLE) 2340 self._match(TokenType.EQ) 2341 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2342 2343 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2344 return self.expression(exp.LogProperty, no=no) 2345 2346 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2347 return self.expression(exp.JournalProperty, **kwargs) 2348 2349 def _parse_checksum(self) -> exp.ChecksumProperty: 2350 self._match(TokenType.EQ) 2351 2352 on = None 2353 if self._match(TokenType.ON): 2354 on = True 2355 elif self._match_text_seq("OFF"): 2356 on = False 2357 2358 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2359 2360 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2361 return self.expression( 2362 exp.Cluster, 2363 expressions=( 2364 self._parse_wrapped_csv(self._parse_ordered) 2365 if wrapped 2366 else self._parse_csv(self._parse_ordered) 2367 ), 2368 ) 2369 2370 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2371 self._match_text_seq("BY") 2372 2373 self._match_l_paren() 2374 expressions = self._parse_csv(self._parse_column) 2375 self._match_r_paren() 2376 2377 if self._match_text_seq("SORTED", "BY"): 2378 self._match_l_paren() 2379 sorted_by = self._parse_csv(self._parse_ordered) 2380 self._match_r_paren() 2381 else: 2382 sorted_by = None 2383 2384 self._match(TokenType.INTO) 2385 buckets = self._parse_number() 2386 self._match_text_seq("BUCKETS") 2387 2388 return self.expression( 2389 exp.ClusteredByProperty, 2390 expressions=expressions, 2391 sorted_by=sorted_by, 2392 buckets=buckets, 2393 ) 2394 2395 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2396 if not self._match_text_seq("GRANTS"): 2397 self._retreat(self._index - 1) 2398 return None 2399 2400 return self.expression(exp.CopyGrantsProperty) 2401 2402 def _parse_freespace(self) -> exp.FreespaceProperty: 2403 self._match(TokenType.EQ) 2404 return self.expression( 2405 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2406 ) 2407 2408 def _parse_mergeblockratio( 2409 self, no: bool = False, default: bool = False 2410 ) -> exp.MergeBlockRatioProperty: 2411 if self._match(TokenType.EQ): 2412 return self.expression( 2413 exp.MergeBlockRatioProperty, 2414 this=self._parse_number(), 2415 percent=self._match(TokenType.PERCENT), 2416 ) 2417 2418 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2419 2420 def _parse_datablocksize( 2421 self, 2422 default: t.Optional[bool] = None, 2423 minimum: t.Optional[bool] = None, 2424 maximum: t.Optional[bool] = None, 2425 ) -> exp.DataBlocksizeProperty: 2426 self._match(TokenType.EQ) 2427 size = self._parse_number() 2428 2429 units = None 2430 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2431 units = self._prev.text 2432 2433 return self.expression( 2434 exp.DataBlocksizeProperty, 2435 size=size, 2436 units=units, 2437 default=default, 2438 minimum=minimum, 2439 maximum=maximum, 2440 ) 2441 2442 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2443 self._match(TokenType.EQ) 2444 always = self._match_text_seq("ALWAYS") 2445 manual = self._match_text_seq("MANUAL") 2446 never = self._match_text_seq("NEVER") 2447 default = self._match_text_seq("DEFAULT") 2448 2449 autotemp = None 2450 if self._match_text_seq("AUTOTEMP"): 2451 autotemp = self._parse_schema() 2452 2453 return self.expression( 2454 exp.BlockCompressionProperty, 2455 always=always, 2456 manual=manual, 2457 never=never, 2458 default=default, 2459 autotemp=autotemp, 2460 ) 2461 2462 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2463 index = self._index 2464 no = self._match_text_seq("NO") 2465 concurrent = self._match_text_seq("CONCURRENT") 2466 2467 if not self._match_text_seq("ISOLATED", "LOADING"): 2468 self._retreat(index) 2469 return None 2470 2471 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2472 return self.expression( 2473 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2474 ) 2475 2476 def _parse_locking(self) -> exp.LockingProperty: 2477 if self._match(TokenType.TABLE): 2478 kind = "TABLE" 2479 elif self._match(TokenType.VIEW): 2480 kind = "VIEW" 2481 elif self._match(TokenType.ROW): 2482 kind = "ROW" 2483 elif self._match_text_seq("DATABASE"): 2484 kind = "DATABASE" 2485 else: 2486 kind = None 2487 2488 if kind in ("DATABASE", "TABLE", "VIEW"): 2489 this = self._parse_table_parts() 2490 else: 2491 this = None 2492 2493 if self._match(TokenType.FOR): 2494 for_or_in = "FOR" 2495 elif self._match(TokenType.IN): 2496 for_or_in = "IN" 2497 else: 2498 for_or_in = None 2499 2500 if self._match_text_seq("ACCESS"): 2501 lock_type = "ACCESS" 2502 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2503 lock_type = "EXCLUSIVE" 2504 elif self._match_text_seq("SHARE"): 2505 lock_type = "SHARE" 2506 elif self._match_text_seq("READ"): 2507 lock_type = "READ" 2508 elif self._match_text_seq("WRITE"): 2509 lock_type = "WRITE" 2510 elif self._match_text_seq("CHECKSUM"): 2511 lock_type = "CHECKSUM" 2512 else: 2513 lock_type = None 2514 2515 override = self._match_text_seq("OVERRIDE") 2516 2517 return self.expression( 2518 exp.LockingProperty, 2519 this=this, 2520 kind=kind, 2521 for_or_in=for_or_in, 2522 lock_type=lock_type, 2523 override=override, 2524 ) 2525 2526 def _parse_partition_by(self) -> t.List[exp.Expression]: 2527 if self._match(TokenType.PARTITION_BY): 2528 return self._parse_csv(self._parse_assignment) 2529 return [] 2530 2531 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2532 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2533 if self._match_text_seq("MINVALUE"): 2534 return exp.var("MINVALUE") 2535 if self._match_text_seq("MAXVALUE"): 2536 return exp.var("MAXVALUE") 2537 return self._parse_bitwise() 2538 2539 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2540 expression = None 2541 from_expressions = None 2542 to_expressions = None 2543 2544 if self._match(TokenType.IN): 2545 this = self._parse_wrapped_csv(self._parse_bitwise) 2546 elif self._match(TokenType.FROM): 2547 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2548 self._match_text_seq("TO") 2549 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2550 elif self._match_text_seq("WITH", "(", "MODULUS"): 2551 this = self._parse_number() 2552 self._match_text_seq(",", "REMAINDER") 2553 expression = self._parse_number() 2554 self._match_r_paren() 2555 else: 2556 self.raise_error("Failed to parse partition bound spec.") 2557 2558 return self.expression( 2559 exp.PartitionBoundSpec, 2560 this=this, 2561 expression=expression, 2562 from_expressions=from_expressions, 2563 to_expressions=to_expressions, 2564 ) 2565 2566 # https://www.postgresql.org/docs/current/sql-createtable.html 2567 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2568 if not self._match_text_seq("OF"): 2569 self._retreat(self._index - 1) 2570 return None 2571 2572 this = self._parse_table(schema=True) 2573 2574 if self._match(TokenType.DEFAULT): 2575 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2576 elif self._match_text_seq("FOR", "VALUES"): 2577 expression = self._parse_partition_bound_spec() 2578 else: 2579 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2580 2581 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2582 2583 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2584 self._match(TokenType.EQ) 2585 return self.expression( 2586 exp.PartitionedByProperty, 2587 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2588 ) 2589 2590 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2591 if self._match_text_seq("AND", "STATISTICS"): 2592 statistics = True 2593 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2594 statistics = False 2595 else: 2596 statistics = None 2597 2598 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2599 2600 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2601 if self._match_text_seq("SQL"): 2602 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2603 return None 2604 2605 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2606 if self._match_text_seq("SQL", "DATA"): 2607 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2608 return None 2609 2610 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2611 if self._match_text_seq("PRIMARY", "INDEX"): 2612 return exp.NoPrimaryIndexProperty() 2613 if self._match_text_seq("SQL"): 2614 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2615 return None 2616 2617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2618 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2619 return exp.OnCommitProperty() 2620 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2621 return exp.OnCommitProperty(delete=True) 2622 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2623 2624 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2625 if self._match_text_seq("SQL", "DATA"): 2626 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2627 return None 2628 2629 def _parse_distkey(self) -> exp.DistKeyProperty: 2630 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2631 2632 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2633 table = self._parse_table(schema=True) 2634 2635 options = [] 2636 while self._match_texts(("INCLUDING", "EXCLUDING")): 2637 this = self._prev.text.upper() 2638 2639 id_var = self._parse_id_var() 2640 if not id_var: 2641 return None 2642 2643 options.append( 2644 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2645 ) 2646 2647 return self.expression(exp.LikeProperty, this=table, expressions=options) 2648 2649 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2650 return self.expression( 2651 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2652 ) 2653 2654 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2655 self._match(TokenType.EQ) 2656 return self.expression( 2657 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2658 ) 2659 2660 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2661 self._match_text_seq("WITH", "CONNECTION") 2662 return self.expression( 2663 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2664 ) 2665 2666 def _parse_returns(self) -> exp.ReturnsProperty: 2667 value: t.Optional[exp.Expression] 2668 null = None 2669 is_table = self._match(TokenType.TABLE) 2670 2671 if is_table: 2672 if self._match(TokenType.LT): 2673 value = self.expression( 2674 exp.Schema, 2675 this="TABLE", 2676 expressions=self._parse_csv(self._parse_struct_types), 2677 ) 2678 if not self._match(TokenType.GT): 2679 self.raise_error("Expecting >") 2680 else: 2681 value = self._parse_schema(exp.var("TABLE")) 2682 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2683 null = True 2684 value = None 2685 else: 2686 value = self._parse_types() 2687 2688 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2689 2690 def _parse_describe(self) -> exp.Describe: 2691 kind = self._match_set(self.CREATABLES) and self._prev.text 2692 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2693 if self._match(TokenType.DOT): 2694 style = None 2695 self._retreat(self._index - 2) 2696 2697 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2698 2699 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2700 this = self._parse_statement() 2701 else: 2702 this = self._parse_table(schema=True) 2703 2704 properties = self._parse_properties() 2705 expressions = properties.expressions if properties else None 2706 partition = self._parse_partition() 2707 return self.expression( 2708 exp.Describe, 2709 this=this, 2710 style=style, 2711 kind=kind, 2712 expressions=expressions, 2713 partition=partition, 2714 format=format, 2715 ) 2716 2717 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2718 kind = self._prev.text.upper() 2719 expressions = [] 2720 2721 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2722 if self._match(TokenType.WHEN): 2723 expression = self._parse_disjunction() 2724 self._match(TokenType.THEN) 2725 else: 2726 expression = None 2727 2728 else_ = self._match(TokenType.ELSE) 2729 2730 if not self._match(TokenType.INTO): 2731 return None 2732 2733 return self.expression( 2734 exp.ConditionalInsert, 2735 this=self.expression( 2736 exp.Insert, 2737 this=self._parse_table(schema=True), 2738 expression=self._parse_derived_table_values(), 2739 ), 2740 expression=expression, 2741 else_=else_, 2742 ) 2743 2744 expression = parse_conditional_insert() 2745 while expression is not None: 2746 expressions.append(expression) 2747 expression = parse_conditional_insert() 2748 2749 return self.expression( 2750 exp.MultitableInserts, 2751 kind=kind, 2752 comments=comments, 2753 expressions=expressions, 2754 source=self._parse_table(), 2755 ) 2756 2757 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2758 comments = [] 2759 hint = self._parse_hint() 2760 overwrite = self._match(TokenType.OVERWRITE) 2761 ignore = self._match(TokenType.IGNORE) 2762 local = self._match_text_seq("LOCAL") 2763 alternative = None 2764 is_function = None 2765 2766 if self._match_text_seq("DIRECTORY"): 2767 this: t.Optional[exp.Expression] = self.expression( 2768 exp.Directory, 2769 this=self._parse_var_or_string(), 2770 local=local, 2771 row_format=self._parse_row_format(match_row=True), 2772 ) 2773 else: 2774 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2775 comments += ensure_list(self._prev_comments) 2776 return self._parse_multitable_inserts(comments) 2777 2778 if self._match(TokenType.OR): 2779 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2780 2781 self._match(TokenType.INTO) 2782 comments += ensure_list(self._prev_comments) 2783 self._match(TokenType.TABLE) 2784 is_function = self._match(TokenType.FUNCTION) 2785 2786 this = ( 2787 self._parse_table(schema=True, parse_partition=True) 2788 if not is_function 2789 else self._parse_function() 2790 ) 2791 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2792 this.set("alias", self._parse_table_alias()) 2793 2794 returning = self._parse_returning() 2795 2796 return self.expression( 2797 exp.Insert, 2798 comments=comments, 2799 hint=hint, 2800 is_function=is_function, 2801 this=this, 2802 stored=self._match_text_seq("STORED") and self._parse_stored(), 2803 by_name=self._match_text_seq("BY", "NAME"), 2804 exists=self._parse_exists(), 2805 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2806 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2807 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2808 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2809 conflict=self._parse_on_conflict(), 2810 returning=returning or self._parse_returning(), 2811 overwrite=overwrite, 2812 alternative=alternative, 2813 ignore=ignore, 2814 source=self._match(TokenType.TABLE) and self._parse_table(), 2815 ) 2816 2817 def _parse_kill(self) -> exp.Kill: 2818 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2819 2820 return self.expression( 2821 exp.Kill, 2822 this=self._parse_primary(), 2823 kind=kind, 2824 ) 2825 2826 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2827 conflict = self._match_text_seq("ON", "CONFLICT") 2828 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2829 2830 if not conflict and not duplicate: 2831 return None 2832 2833 conflict_keys = None 2834 constraint = None 2835 2836 if conflict: 2837 if self._match_text_seq("ON", "CONSTRAINT"): 2838 constraint = self._parse_id_var() 2839 elif self._match(TokenType.L_PAREN): 2840 conflict_keys = self._parse_csv(self._parse_id_var) 2841 self._match_r_paren() 2842 2843 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2844 if self._prev.token_type == TokenType.UPDATE: 2845 self._match(TokenType.SET) 2846 expressions = self._parse_csv(self._parse_equality) 2847 else: 2848 expressions = None 2849 2850 return self.expression( 2851 exp.OnConflict, 2852 duplicate=duplicate, 2853 expressions=expressions, 2854 action=action, 2855 conflict_keys=conflict_keys, 2856 constraint=constraint, 2857 where=self._parse_where(), 2858 ) 2859 2860 def _parse_returning(self) -> t.Optional[exp.Returning]: 2861 if not self._match(TokenType.RETURNING): 2862 return None 2863 return self.expression( 2864 exp.Returning, 2865 expressions=self._parse_csv(self._parse_expression), 2866 into=self._match(TokenType.INTO) and self._parse_table_part(), 2867 ) 2868 2869 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2870 if not self._match(TokenType.FORMAT): 2871 return None 2872 return self._parse_row_format() 2873 2874 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2875 index = self._index 2876 with_ = with_ or self._match_text_seq("WITH") 2877 2878 if not self._match(TokenType.SERDE_PROPERTIES): 2879 self._retreat(index) 2880 return None 2881 return self.expression( 2882 exp.SerdeProperties, 2883 **{ # type: ignore 2884 "expressions": self._parse_wrapped_properties(), 2885 "with": with_, 2886 }, 2887 ) 2888 2889 def _parse_row_format( 2890 self, match_row: bool = False 2891 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2892 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2893 return None 2894 2895 if self._match_text_seq("SERDE"): 2896 this = self._parse_string() 2897 2898 serde_properties = self._parse_serde_properties() 2899 2900 return self.expression( 2901 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2902 ) 2903 2904 self._match_text_seq("DELIMITED") 2905 2906 kwargs = {} 2907 2908 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2909 kwargs["fields"] = self._parse_string() 2910 if self._match_text_seq("ESCAPED", "BY"): 2911 kwargs["escaped"] = self._parse_string() 2912 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2913 kwargs["collection_items"] = self._parse_string() 2914 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2915 kwargs["map_keys"] = self._parse_string() 2916 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2917 kwargs["lines"] = self._parse_string() 2918 if self._match_text_seq("NULL", "DEFINED", "AS"): 2919 kwargs["null"] = self._parse_string() 2920 2921 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2922 2923 def _parse_load(self) -> exp.LoadData | exp.Command: 2924 if self._match_text_seq("DATA"): 2925 local = self._match_text_seq("LOCAL") 2926 self._match_text_seq("INPATH") 2927 inpath = self._parse_string() 2928 overwrite = self._match(TokenType.OVERWRITE) 2929 self._match_pair(TokenType.INTO, TokenType.TABLE) 2930 2931 return self.expression( 2932 exp.LoadData, 2933 this=self._parse_table(schema=True), 2934 local=local, 2935 overwrite=overwrite, 2936 inpath=inpath, 2937 partition=self._parse_partition(), 2938 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2939 serde=self._match_text_seq("SERDE") and self._parse_string(), 2940 ) 2941 return self._parse_as_command(self._prev) 2942 2943 def _parse_delete(self) -> exp.Delete: 2944 # This handles MySQL's "Multiple-Table Syntax" 2945 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2946 tables = None 2947 if not self._match(TokenType.FROM, advance=False): 2948 tables = self._parse_csv(self._parse_table) or None 2949 2950 returning = self._parse_returning() 2951 2952 return self.expression( 2953 exp.Delete, 2954 tables=tables, 2955 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2956 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2957 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2958 where=self._parse_where(), 2959 returning=returning or self._parse_returning(), 2960 limit=self._parse_limit(), 2961 ) 2962 2963 def _parse_update(self) -> exp.Update: 2964 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2965 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2966 returning = self._parse_returning() 2967 return self.expression( 2968 exp.Update, 2969 **{ # type: ignore 2970 "this": this, 2971 "expressions": expressions, 2972 "from": self._parse_from(joins=True), 2973 "where": self._parse_where(), 2974 "returning": returning or self._parse_returning(), 2975 "order": self._parse_order(), 2976 "limit": self._parse_limit(), 2977 }, 2978 ) 2979 2980 def _parse_use(self) -> exp.Use: 2981 return self.expression( 2982 exp.Use, 2983 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2984 this=self._parse_table(schema=False), 2985 ) 2986 2987 def _parse_uncache(self) -> exp.Uncache: 2988 if not self._match(TokenType.TABLE): 2989 self.raise_error("Expecting TABLE after UNCACHE") 2990 2991 return self.expression( 2992 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2993 ) 2994 2995 def _parse_cache(self) -> exp.Cache: 2996 lazy = self._match_text_seq("LAZY") 2997 self._match(TokenType.TABLE) 2998 table = self._parse_table(schema=True) 2999 3000 options = [] 3001 if self._match_text_seq("OPTIONS"): 3002 self._match_l_paren() 3003 k = self._parse_string() 3004 self._match(TokenType.EQ) 3005 v = self._parse_string() 3006 options = [k, v] 3007 self._match_r_paren() 3008 3009 self._match(TokenType.ALIAS) 3010 return self.expression( 3011 exp.Cache, 3012 this=table, 3013 lazy=lazy, 3014 options=options, 3015 expression=self._parse_select(nested=True), 3016 ) 3017 3018 def _parse_partition(self) -> t.Optional[exp.Partition]: 3019 if not self._match_texts(self.PARTITION_KEYWORDS): 3020 return None 3021 3022 return self.expression( 3023 exp.Partition, 3024 subpartition=self._prev.text.upper() == "SUBPARTITION", 3025 expressions=self._parse_wrapped_csv(self._parse_assignment), 3026 ) 3027 3028 def _parse_value(self) -> t.Optional[exp.Tuple]: 3029 def _parse_value_expression() -> t.Optional[exp.Expression]: 3030 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3031 return exp.var(self._prev.text.upper()) 3032 return self._parse_expression() 3033 3034 if self._match(TokenType.L_PAREN): 3035 expressions = self._parse_csv(_parse_value_expression) 3036 self._match_r_paren() 3037 return self.expression(exp.Tuple, expressions=expressions) 3038 3039 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3040 expression = self._parse_expression() 3041 if expression: 3042 return self.expression(exp.Tuple, expressions=[expression]) 3043 return None 3044 3045 def _parse_projections(self) -> t.List[exp.Expression]: 3046 return self._parse_expressions() 3047 3048 def _parse_select( 3049 self, 3050 nested: bool = False, 3051 table: bool = False, 3052 parse_subquery_alias: bool = True, 3053 parse_set_operation: bool = True, 3054 ) -> t.Optional[exp.Expression]: 3055 cte = self._parse_with() 3056 3057 if cte: 3058 this = self._parse_statement() 3059 3060 if not this: 3061 self.raise_error("Failed to parse any statement following CTE") 3062 return cte 3063 3064 if "with" in this.arg_types: 3065 this.set("with", cte) 3066 else: 3067 self.raise_error(f"{this.key} does not support CTE") 3068 this = cte 3069 3070 return this 3071 3072 # duckdb supports leading with FROM x 3073 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3074 3075 if self._match(TokenType.SELECT): 3076 comments = self._prev_comments 3077 3078 hint = self._parse_hint() 3079 3080 if self._next and not self._next.token_type == TokenType.DOT: 3081 all_ = self._match(TokenType.ALL) 3082 distinct = self._match_set(self.DISTINCT_TOKENS) 3083 else: 3084 all_, distinct = None, None 3085 3086 kind = ( 3087 self._match(TokenType.ALIAS) 3088 and self._match_texts(("STRUCT", "VALUE")) 3089 and self._prev.text.upper() 3090 ) 3091 3092 if distinct: 3093 distinct = self.expression( 3094 exp.Distinct, 3095 on=self._parse_value() if self._match(TokenType.ON) else None, 3096 ) 3097 3098 if all_ and distinct: 3099 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3100 3101 operation_modifiers = [] 3102 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3103 operation_modifiers.append(exp.var(self._prev.text.upper())) 3104 3105 limit = self._parse_limit(top=True) 3106 projections = self._parse_projections() 3107 3108 this = self.expression( 3109 exp.Select, 3110 kind=kind, 3111 hint=hint, 3112 distinct=distinct, 3113 expressions=projections, 3114 limit=limit, 3115 operation_modifiers=operation_modifiers or None, 3116 ) 3117 this.comments = comments 3118 3119 into = self._parse_into() 3120 if into: 3121 this.set("into", into) 3122 3123 if not from_: 3124 from_ = self._parse_from() 3125 3126 if from_: 3127 this.set("from", from_) 3128 3129 this = self._parse_query_modifiers(this) 3130 elif (table or nested) and self._match(TokenType.L_PAREN): 3131 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3132 this = self._parse_simplified_pivot( 3133 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3134 ) 3135 elif self._match(TokenType.FROM): 3136 from_ = self._parse_from(skip_from_token=True) 3137 # Support parentheses for duckdb FROM-first syntax 3138 select = self._parse_select() 3139 if select: 3140 select.set("from", from_) 3141 this = select 3142 else: 3143 this = exp.select("*").from_(t.cast(exp.From, from_)) 3144 else: 3145 this = ( 3146 self._parse_table() 3147 if table 3148 else self._parse_select(nested=True, parse_set_operation=False) 3149 ) 3150 3151 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3152 # in case a modifier (e.g. join) is following 3153 if table and isinstance(this, exp.Values) and this.alias: 3154 alias = this.args["alias"].pop() 3155 this = exp.Table(this=this, alias=alias) 3156 3157 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3158 3159 self._match_r_paren() 3160 3161 # We return early here so that the UNION isn't attached to the subquery by the 3162 # following call to _parse_set_operations, but instead becomes the parent node 3163 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3164 elif self._match(TokenType.VALUES, advance=False): 3165 this = self._parse_derived_table_values() 3166 elif from_: 3167 this = exp.select("*").from_(from_.this, copy=False) 3168 elif self._match(TokenType.SUMMARIZE): 3169 table = self._match(TokenType.TABLE) 3170 this = self._parse_select() or self._parse_string() or self._parse_table() 3171 return self.expression(exp.Summarize, this=this, table=table) 3172 elif self._match(TokenType.DESCRIBE): 3173 this = self._parse_describe() 3174 elif self._match_text_seq("STREAM"): 3175 this = self._parse_function() 3176 if this: 3177 this = self.expression(exp.Stream, this=this) 3178 else: 3179 self._retreat(self._index - 1) 3180 else: 3181 this = None 3182 3183 return self._parse_set_operations(this) if parse_set_operation else this 3184 3185 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3186 self._match_text_seq("SEARCH") 3187 3188 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3189 3190 if not kind: 3191 return None 3192 3193 self._match_text_seq("FIRST", "BY") 3194 3195 return self.expression( 3196 exp.RecursiveWithSearch, 3197 kind=kind, 3198 this=self._parse_id_var(), 3199 expression=self._match_text_seq("SET") and self._parse_id_var(), 3200 using=self._match_text_seq("USING") and self._parse_id_var(), 3201 ) 3202 3203 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3204 if not skip_with_token and not self._match(TokenType.WITH): 3205 return None 3206 3207 comments = self._prev_comments 3208 recursive = self._match(TokenType.RECURSIVE) 3209 3210 last_comments = None 3211 expressions = [] 3212 while True: 3213 cte = self._parse_cte() 3214 if isinstance(cte, exp.CTE): 3215 expressions.append(cte) 3216 if last_comments: 3217 cte.add_comments(last_comments) 3218 3219 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3220 break 3221 else: 3222 self._match(TokenType.WITH) 3223 3224 last_comments = self._prev_comments 3225 3226 return self.expression( 3227 exp.With, 3228 comments=comments, 3229 expressions=expressions, 3230 recursive=recursive, 3231 search=self._parse_recursive_with_search(), 3232 ) 3233 3234 def _parse_cte(self) -> t.Optional[exp.CTE]: 3235 index = self._index 3236 3237 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3238 if not alias or not alias.this: 3239 self.raise_error("Expected CTE to have alias") 3240 3241 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3242 self._retreat(index) 3243 return None 3244 3245 comments = self._prev_comments 3246 3247 if self._match_text_seq("NOT", "MATERIALIZED"): 3248 materialized = False 3249 elif self._match_text_seq("MATERIALIZED"): 3250 materialized = True 3251 else: 3252 materialized = None 3253 3254 cte = self.expression( 3255 exp.CTE, 3256 this=self._parse_wrapped(self._parse_statement), 3257 alias=alias, 3258 materialized=materialized, 3259 comments=comments, 3260 ) 3261 3262 if isinstance(cte.this, exp.Values): 3263 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3264 3265 return cte 3266 3267 def _parse_table_alias( 3268 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3269 ) -> t.Optional[exp.TableAlias]: 3270 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3271 # so this section tries to parse the clause version and if it fails, it treats the token 3272 # as an identifier (alias) 3273 if self._can_parse_limit_or_offset(): 3274 return None 3275 3276 any_token = self._match(TokenType.ALIAS) 3277 alias = ( 3278 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3279 or self._parse_string_as_identifier() 3280 ) 3281 3282 index = self._index 3283 if self._match(TokenType.L_PAREN): 3284 columns = self._parse_csv(self._parse_function_parameter) 3285 self._match_r_paren() if columns else self._retreat(index) 3286 else: 3287 columns = None 3288 3289 if not alias and not columns: 3290 return None 3291 3292 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3293 3294 # We bubble up comments from the Identifier to the TableAlias 3295 if isinstance(alias, exp.Identifier): 3296 table_alias.add_comments(alias.pop_comments()) 3297 3298 return table_alias 3299 3300 def _parse_subquery( 3301 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3302 ) -> t.Optional[exp.Subquery]: 3303 if not this: 3304 return None 3305 3306 return self.expression( 3307 exp.Subquery, 3308 this=this, 3309 pivots=self._parse_pivots(), 3310 alias=self._parse_table_alias() if parse_alias else None, 3311 sample=self._parse_table_sample(), 3312 ) 3313 3314 def _implicit_unnests_to_explicit(self, this: E) -> E: 3315 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3316 3317 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3318 for i, join in enumerate(this.args.get("joins") or []): 3319 table = join.this 3320 normalized_table = table.copy() 3321 normalized_table.meta["maybe_column"] = True 3322 normalized_table = _norm(normalized_table, dialect=self.dialect) 3323 3324 if isinstance(table, exp.Table) and not join.args.get("on"): 3325 if normalized_table.parts[0].name in refs: 3326 table_as_column = table.to_column() 3327 unnest = exp.Unnest(expressions=[table_as_column]) 3328 3329 # Table.to_column creates a parent Alias node that we want to convert to 3330 # a TableAlias and attach to the Unnest, so it matches the parser's output 3331 if isinstance(table.args.get("alias"), exp.TableAlias): 3332 table_as_column.replace(table_as_column.this) 3333 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3334 3335 table.replace(unnest) 3336 3337 refs.add(normalized_table.alias_or_name) 3338 3339 return this 3340 3341 def _parse_query_modifiers( 3342 self, this: t.Optional[exp.Expression] 3343 ) -> t.Optional[exp.Expression]: 3344 if isinstance(this, (exp.Query, exp.Table)): 3345 for join in self._parse_joins(): 3346 this.append("joins", join) 3347 for lateral in iter(self._parse_lateral, None): 3348 this.append("laterals", lateral) 3349 3350 while True: 3351 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3352 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3353 key, expression = parser(self) 3354 3355 if expression: 3356 this.set(key, expression) 3357 if key == "limit": 3358 offset = expression.args.pop("offset", None) 3359 3360 if offset: 3361 offset = exp.Offset(expression=offset) 3362 this.set("offset", offset) 3363 3364 limit_by_expressions = expression.expressions 3365 expression.set("expressions", None) 3366 offset.set("expressions", limit_by_expressions) 3367 continue 3368 break 3369 3370 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3371 this = self._implicit_unnests_to_explicit(this) 3372 3373 return this 3374 3375 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3376 start = self._curr 3377 while self._curr: 3378 self._advance() 3379 3380 end = self._tokens[self._index - 1] 3381 return exp.Hint(expressions=[self._find_sql(start, end)]) 3382 3383 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3384 return self._parse_function_call() 3385 3386 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3387 start_index = self._index 3388 should_fallback_to_string = False 3389 3390 hints = [] 3391 try: 3392 for hint in iter( 3393 lambda: self._parse_csv( 3394 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3395 ), 3396 [], 3397 ): 3398 hints.extend(hint) 3399 except ParseError: 3400 should_fallback_to_string = True 3401 3402 if should_fallback_to_string or self._curr: 3403 self._retreat(start_index) 3404 return self._parse_hint_fallback_to_string() 3405 3406 return self.expression(exp.Hint, expressions=hints) 3407 3408 def _parse_hint(self) -> t.Optional[exp.Hint]: 3409 if self._match(TokenType.HINT) and self._prev_comments: 3410 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3411 3412 return None 3413 3414 def _parse_into(self) -> t.Optional[exp.Into]: 3415 if not self._match(TokenType.INTO): 3416 return None 3417 3418 temp = self._match(TokenType.TEMPORARY) 3419 unlogged = self._match_text_seq("UNLOGGED") 3420 self._match(TokenType.TABLE) 3421 3422 return self.expression( 3423 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3424 ) 3425 3426 def _parse_from( 3427 self, joins: bool = False, skip_from_token: bool = False 3428 ) -> t.Optional[exp.From]: 3429 if not skip_from_token and not self._match(TokenType.FROM): 3430 return None 3431 3432 return self.expression( 3433 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3434 ) 3435 3436 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3437 return self.expression( 3438 exp.MatchRecognizeMeasure, 3439 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3440 this=self._parse_expression(), 3441 ) 3442 3443 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3444 if not self._match(TokenType.MATCH_RECOGNIZE): 3445 return None 3446 3447 self._match_l_paren() 3448 3449 partition = self._parse_partition_by() 3450 order = self._parse_order() 3451 3452 measures = ( 3453 self._parse_csv(self._parse_match_recognize_measure) 3454 if self._match_text_seq("MEASURES") 3455 else None 3456 ) 3457 3458 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3459 rows = exp.var("ONE ROW PER MATCH") 3460 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3461 text = "ALL ROWS PER MATCH" 3462 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3463 text += " SHOW EMPTY MATCHES" 3464 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3465 text += " OMIT EMPTY MATCHES" 3466 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3467 text += " WITH UNMATCHED ROWS" 3468 rows = exp.var(text) 3469 else: 3470 rows = None 3471 3472 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3473 text = "AFTER MATCH SKIP" 3474 if self._match_text_seq("PAST", "LAST", "ROW"): 3475 text += " PAST LAST ROW" 3476 elif self._match_text_seq("TO", "NEXT", "ROW"): 3477 text += " TO NEXT ROW" 3478 elif self._match_text_seq("TO", "FIRST"): 3479 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3480 elif self._match_text_seq("TO", "LAST"): 3481 text += f" TO LAST {self._advance_any().text}" # type: ignore 3482 after = exp.var(text) 3483 else: 3484 after = None 3485 3486 if self._match_text_seq("PATTERN"): 3487 self._match_l_paren() 3488 3489 if not self._curr: 3490 self.raise_error("Expecting )", self._curr) 3491 3492 paren = 1 3493 start = self._curr 3494 3495 while self._curr and paren > 0: 3496 if self._curr.token_type == TokenType.L_PAREN: 3497 paren += 1 3498 if self._curr.token_type == TokenType.R_PAREN: 3499 paren -= 1 3500 3501 end = self._prev 3502 self._advance() 3503 3504 if paren > 0: 3505 self.raise_error("Expecting )", self._curr) 3506 3507 pattern = exp.var(self._find_sql(start, end)) 3508 else: 3509 pattern = None 3510 3511 define = ( 3512 self._parse_csv(self._parse_name_as_expression) 3513 if self._match_text_seq("DEFINE") 3514 else None 3515 ) 3516 3517 self._match_r_paren() 3518 3519 return self.expression( 3520 exp.MatchRecognize, 3521 partition_by=partition, 3522 order=order, 3523 measures=measures, 3524 rows=rows, 3525 after=after, 3526 pattern=pattern, 3527 define=define, 3528 alias=self._parse_table_alias(), 3529 ) 3530 3531 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3532 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3533 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3534 cross_apply = False 3535 3536 if cross_apply is not None: 3537 this = self._parse_select(table=True) 3538 view = None 3539 outer = None 3540 elif self._match(TokenType.LATERAL): 3541 this = self._parse_select(table=True) 3542 view = self._match(TokenType.VIEW) 3543 outer = self._match(TokenType.OUTER) 3544 else: 3545 return None 3546 3547 if not this: 3548 this = ( 3549 self._parse_unnest() 3550 or self._parse_function() 3551 or self._parse_id_var(any_token=False) 3552 ) 3553 3554 while self._match(TokenType.DOT): 3555 this = exp.Dot( 3556 this=this, 3557 expression=self._parse_function() or self._parse_id_var(any_token=False), 3558 ) 3559 3560 if view: 3561 table = self._parse_id_var(any_token=False) 3562 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3563 table_alias: t.Optional[exp.TableAlias] = self.expression( 3564 exp.TableAlias, this=table, columns=columns 3565 ) 3566 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3567 # We move the alias from the lateral's child node to the lateral itself 3568 table_alias = this.args["alias"].pop() 3569 else: 3570 table_alias = self._parse_table_alias() 3571 3572 return self.expression( 3573 exp.Lateral, 3574 this=this, 3575 view=view, 3576 outer=outer, 3577 alias=table_alias, 3578 cross_apply=cross_apply, 3579 ) 3580 3581 def _parse_join_parts( 3582 self, 3583 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3584 return ( 3585 self._match_set(self.JOIN_METHODS) and self._prev, 3586 self._match_set(self.JOIN_SIDES) and self._prev, 3587 self._match_set(self.JOIN_KINDS) and self._prev, 3588 ) 3589 3590 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3591 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3592 this = self._parse_column() 3593 if isinstance(this, exp.Column): 3594 return this.this 3595 return this 3596 3597 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3598 3599 def _parse_join( 3600 self, skip_join_token: bool = False, parse_bracket: bool = False 3601 ) -> t.Optional[exp.Join]: 3602 if self._match(TokenType.COMMA): 3603 return self.expression(exp.Join, this=self._parse_table()) 3604 3605 index = self._index 3606 method, side, kind = self._parse_join_parts() 3607 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3608 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3609 3610 if not skip_join_token and not join: 3611 self._retreat(index) 3612 kind = None 3613 method = None 3614 side = None 3615 3616 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3617 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3618 3619 if not skip_join_token and not join and not outer_apply and not cross_apply: 3620 return None 3621 3622 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3623 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3624 kwargs["expressions"] = self._parse_csv( 3625 lambda: self._parse_table(parse_bracket=parse_bracket) 3626 ) 3627 3628 if method: 3629 kwargs["method"] = method.text 3630 if side: 3631 kwargs["side"] = side.text 3632 if kind: 3633 kwargs["kind"] = kind.text 3634 if hint: 3635 kwargs["hint"] = hint 3636 3637 if self._match(TokenType.MATCH_CONDITION): 3638 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3639 3640 if self._match(TokenType.ON): 3641 kwargs["on"] = self._parse_assignment() 3642 elif self._match(TokenType.USING): 3643 kwargs["using"] = self._parse_using_identifiers() 3644 elif ( 3645 not (outer_apply or cross_apply) 3646 and not isinstance(kwargs["this"], exp.Unnest) 3647 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3648 ): 3649 index = self._index 3650 joins: t.Optional[list] = list(self._parse_joins()) 3651 3652 if joins and self._match(TokenType.ON): 3653 kwargs["on"] = self._parse_assignment() 3654 elif joins and self._match(TokenType.USING): 3655 kwargs["using"] = self._parse_using_identifiers() 3656 else: 3657 joins = None 3658 self._retreat(index) 3659 3660 kwargs["this"].set("joins", joins if joins else None) 3661 3662 comments = [c for token in (method, side, kind) if token for c in token.comments] 3663 return self.expression(exp.Join, comments=comments, **kwargs) 3664 3665 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3666 this = self._parse_assignment() 3667 3668 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3669 return this 3670 3671 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3672 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3673 3674 return this 3675 3676 def _parse_index_params(self) -> exp.IndexParameters: 3677 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3678 3679 if self._match(TokenType.L_PAREN, advance=False): 3680 columns = self._parse_wrapped_csv(self._parse_with_operator) 3681 else: 3682 columns = None 3683 3684 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3685 partition_by = self._parse_partition_by() 3686 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3687 tablespace = ( 3688 self._parse_var(any_token=True) 3689 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3690 else None 3691 ) 3692 where = self._parse_where() 3693 3694 on = self._parse_field() if self._match(TokenType.ON) else None 3695 3696 return self.expression( 3697 exp.IndexParameters, 3698 using=using, 3699 columns=columns, 3700 include=include, 3701 partition_by=partition_by, 3702 where=where, 3703 with_storage=with_storage, 3704 tablespace=tablespace, 3705 on=on, 3706 ) 3707 3708 def _parse_index( 3709 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3710 ) -> t.Optional[exp.Index]: 3711 if index or anonymous: 3712 unique = None 3713 primary = None 3714 amp = None 3715 3716 self._match(TokenType.ON) 3717 self._match(TokenType.TABLE) # hive 3718 table = self._parse_table_parts(schema=True) 3719 else: 3720 unique = self._match(TokenType.UNIQUE) 3721 primary = self._match_text_seq("PRIMARY") 3722 amp = self._match_text_seq("AMP") 3723 3724 if not self._match(TokenType.INDEX): 3725 return None 3726 3727 index = self._parse_id_var() 3728 table = None 3729 3730 params = self._parse_index_params() 3731 3732 return self.expression( 3733 exp.Index, 3734 this=index, 3735 table=table, 3736 unique=unique, 3737 primary=primary, 3738 amp=amp, 3739 params=params, 3740 ) 3741 3742 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3743 hints: t.List[exp.Expression] = [] 3744 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3745 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3746 hints.append( 3747 self.expression( 3748 exp.WithTableHint, 3749 expressions=self._parse_csv( 3750 lambda: self._parse_function() or self._parse_var(any_token=True) 3751 ), 3752 ) 3753 ) 3754 self._match_r_paren() 3755 else: 3756 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3757 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3758 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3759 3760 self._match_set((TokenType.INDEX, TokenType.KEY)) 3761 if self._match(TokenType.FOR): 3762 hint.set("target", self._advance_any() and self._prev.text.upper()) 3763 3764 hint.set("expressions", self._parse_wrapped_id_vars()) 3765 hints.append(hint) 3766 3767 return hints or None 3768 3769 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3770 return ( 3771 (not schema and self._parse_function(optional_parens=False)) 3772 or self._parse_id_var(any_token=False) 3773 or self._parse_string_as_identifier() 3774 or self._parse_placeholder() 3775 ) 3776 3777 def _parse_table_parts( 3778 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3779 ) -> exp.Table: 3780 catalog = None 3781 db = None 3782 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3783 3784 while self._match(TokenType.DOT): 3785 if catalog: 3786 # This allows nesting the table in arbitrarily many dot expressions if needed 3787 table = self.expression( 3788 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3789 ) 3790 else: 3791 catalog = db 3792 db = table 3793 # "" used for tsql FROM a..b case 3794 table = self._parse_table_part(schema=schema) or "" 3795 3796 if ( 3797 wildcard 3798 and self._is_connected() 3799 and (isinstance(table, exp.Identifier) or not table) 3800 and self._match(TokenType.STAR) 3801 ): 3802 if isinstance(table, exp.Identifier): 3803 table.args["this"] += "*" 3804 else: 3805 table = exp.Identifier(this="*") 3806 3807 # We bubble up comments from the Identifier to the Table 3808 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3809 3810 if is_db_reference: 3811 catalog = db 3812 db = table 3813 table = None 3814 3815 if not table and not is_db_reference: 3816 self.raise_error(f"Expected table name but got {self._curr}") 3817 if not db and is_db_reference: 3818 self.raise_error(f"Expected database name but got {self._curr}") 3819 3820 table = self.expression( 3821 exp.Table, 3822 comments=comments, 3823 this=table, 3824 db=db, 3825 catalog=catalog, 3826 ) 3827 3828 changes = self._parse_changes() 3829 if changes: 3830 table.set("changes", changes) 3831 3832 at_before = self._parse_historical_data() 3833 if at_before: 3834 table.set("when", at_before) 3835 3836 pivots = self._parse_pivots() 3837 if pivots: 3838 table.set("pivots", pivots) 3839 3840 return table 3841 3842 def _parse_table( 3843 self, 3844 schema: bool = False, 3845 joins: bool = False, 3846 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3847 parse_bracket: bool = False, 3848 is_db_reference: bool = False, 3849 parse_partition: bool = False, 3850 ) -> t.Optional[exp.Expression]: 3851 lateral = self._parse_lateral() 3852 if lateral: 3853 return lateral 3854 3855 unnest = self._parse_unnest() 3856 if unnest: 3857 return unnest 3858 3859 values = self._parse_derived_table_values() 3860 if values: 3861 return values 3862 3863 subquery = self._parse_select(table=True) 3864 if subquery: 3865 if not subquery.args.get("pivots"): 3866 subquery.set("pivots", self._parse_pivots()) 3867 return subquery 3868 3869 bracket = parse_bracket and self._parse_bracket(None) 3870 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3871 3872 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3873 self._parse_table 3874 ) 3875 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3876 3877 only = self._match(TokenType.ONLY) 3878 3879 this = t.cast( 3880 exp.Expression, 3881 bracket 3882 or rows_from 3883 or self._parse_bracket( 3884 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3885 ), 3886 ) 3887 3888 if only: 3889 this.set("only", only) 3890 3891 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3892 self._match_text_seq("*") 3893 3894 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3895 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3896 this.set("partition", self._parse_partition()) 3897 3898 if schema: 3899 return self._parse_schema(this=this) 3900 3901 version = self._parse_version() 3902 3903 if version: 3904 this.set("version", version) 3905 3906 if self.dialect.ALIAS_POST_TABLESAMPLE: 3907 this.set("sample", self._parse_table_sample()) 3908 3909 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3910 if alias: 3911 this.set("alias", alias) 3912 3913 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3914 return self.expression( 3915 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3916 ) 3917 3918 this.set("hints", self._parse_table_hints()) 3919 3920 if not this.args.get("pivots"): 3921 this.set("pivots", self._parse_pivots()) 3922 3923 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3924 this.set("sample", self._parse_table_sample()) 3925 3926 if joins: 3927 for join in self._parse_joins(): 3928 this.append("joins", join) 3929 3930 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3931 this.set("ordinality", True) 3932 this.set("alias", self._parse_table_alias()) 3933 3934 return this 3935 3936 def _parse_version(self) -> t.Optional[exp.Version]: 3937 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3938 this = "TIMESTAMP" 3939 elif self._match(TokenType.VERSION_SNAPSHOT): 3940 this = "VERSION" 3941 else: 3942 return None 3943 3944 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3945 kind = self._prev.text.upper() 3946 start = self._parse_bitwise() 3947 self._match_texts(("TO", "AND")) 3948 end = self._parse_bitwise() 3949 expression: t.Optional[exp.Expression] = self.expression( 3950 exp.Tuple, expressions=[start, end] 3951 ) 3952 elif self._match_text_seq("CONTAINED", "IN"): 3953 kind = "CONTAINED IN" 3954 expression = self.expression( 3955 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3956 ) 3957 elif self._match(TokenType.ALL): 3958 kind = "ALL" 3959 expression = None 3960 else: 3961 self._match_text_seq("AS", "OF") 3962 kind = "AS OF" 3963 expression = self._parse_type() 3964 3965 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3966 3967 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3968 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3969 index = self._index 3970 historical_data = None 3971 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3972 this = self._prev.text.upper() 3973 kind = ( 3974 self._match(TokenType.L_PAREN) 3975 and self._match_texts(self.HISTORICAL_DATA_KIND) 3976 and self._prev.text.upper() 3977 ) 3978 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3979 3980 if expression: 3981 self._match_r_paren() 3982 historical_data = self.expression( 3983 exp.HistoricalData, this=this, kind=kind, expression=expression 3984 ) 3985 else: 3986 self._retreat(index) 3987 3988 return historical_data 3989 3990 def _parse_changes(self) -> t.Optional[exp.Changes]: 3991 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3992 return None 3993 3994 information = self._parse_var(any_token=True) 3995 self._match_r_paren() 3996 3997 return self.expression( 3998 exp.Changes, 3999 information=information, 4000 at_before=self._parse_historical_data(), 4001 end=self._parse_historical_data(), 4002 ) 4003 4004 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4005 if not self._match(TokenType.UNNEST): 4006 return None 4007 4008 expressions = self._parse_wrapped_csv(self._parse_equality) 4009 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4010 4011 alias = self._parse_table_alias() if with_alias else None 4012 4013 if alias: 4014 if self.dialect.UNNEST_COLUMN_ONLY: 4015 if alias.args.get("columns"): 4016 self.raise_error("Unexpected extra column alias in unnest.") 4017 4018 alias.set("columns", [alias.this]) 4019 alias.set("this", None) 4020 4021 columns = alias.args.get("columns") or [] 4022 if offset and len(expressions) < len(columns): 4023 offset = columns.pop() 4024 4025 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4026 self._match(TokenType.ALIAS) 4027 offset = self._parse_id_var( 4028 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4029 ) or exp.to_identifier("offset") 4030 4031 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4032 4033 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4034 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4035 if not is_derived and not ( 4036 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4037 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4038 ): 4039 return None 4040 4041 expressions = self._parse_csv(self._parse_value) 4042 alias = self._parse_table_alias() 4043 4044 if is_derived: 4045 self._match_r_paren() 4046 4047 return self.expression( 4048 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4049 ) 4050 4051 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4052 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4053 as_modifier and self._match_text_seq("USING", "SAMPLE") 4054 ): 4055 return None 4056 4057 bucket_numerator = None 4058 bucket_denominator = None 4059 bucket_field = None 4060 percent = None 4061 size = None 4062 seed = None 4063 4064 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4065 matched_l_paren = self._match(TokenType.L_PAREN) 4066 4067 if self.TABLESAMPLE_CSV: 4068 num = None 4069 expressions = self._parse_csv(self._parse_primary) 4070 else: 4071 expressions = None 4072 num = ( 4073 self._parse_factor() 4074 if self._match(TokenType.NUMBER, advance=False) 4075 else self._parse_primary() or self._parse_placeholder() 4076 ) 4077 4078 if self._match_text_seq("BUCKET"): 4079 bucket_numerator = self._parse_number() 4080 self._match_text_seq("OUT", "OF") 4081 bucket_denominator = bucket_denominator = self._parse_number() 4082 self._match(TokenType.ON) 4083 bucket_field = self._parse_field() 4084 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4085 percent = num 4086 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4087 size = num 4088 else: 4089 percent = num 4090 4091 if matched_l_paren: 4092 self._match_r_paren() 4093 4094 if self._match(TokenType.L_PAREN): 4095 method = self._parse_var(upper=True) 4096 seed = self._match(TokenType.COMMA) and self._parse_number() 4097 self._match_r_paren() 4098 elif self._match_texts(("SEED", "REPEATABLE")): 4099 seed = self._parse_wrapped(self._parse_number) 4100 4101 if not method and self.DEFAULT_SAMPLING_METHOD: 4102 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4103 4104 return self.expression( 4105 exp.TableSample, 4106 expressions=expressions, 4107 method=method, 4108 bucket_numerator=bucket_numerator, 4109 bucket_denominator=bucket_denominator, 4110 bucket_field=bucket_field, 4111 percent=percent, 4112 size=size, 4113 seed=seed, 4114 ) 4115 4116 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4117 return list(iter(self._parse_pivot, None)) or None 4118 4119 def _parse_joins(self) -> t.Iterator[exp.Join]: 4120 return iter(self._parse_join, None) 4121 4122 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4123 if not self._match(TokenType.INTO): 4124 return None 4125 4126 return self.expression( 4127 exp.UnpivotColumns, 4128 this=self._match_text_seq("NAME") and self._parse_column(), 4129 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4130 ) 4131 4132 # https://duckdb.org/docs/sql/statements/pivot 4133 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4134 def _parse_on() -> t.Optional[exp.Expression]: 4135 this = self._parse_bitwise() 4136 4137 if self._match(TokenType.IN): 4138 # PIVOT ... ON col IN (row_val1, row_val2) 4139 return self._parse_in(this) 4140 if self._match(TokenType.ALIAS, advance=False): 4141 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4142 return self._parse_alias(this) 4143 4144 return this 4145 4146 this = self._parse_table() 4147 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4148 into = self._parse_unpivot_columns() 4149 using = self._match(TokenType.USING) and self._parse_csv( 4150 lambda: self._parse_alias(self._parse_function()) 4151 ) 4152 group = self._parse_group() 4153 4154 return self.expression( 4155 exp.Pivot, 4156 this=this, 4157 expressions=expressions, 4158 using=using, 4159 group=group, 4160 unpivot=is_unpivot, 4161 into=into, 4162 ) 4163 4164 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4165 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4166 this = self._parse_select_or_expression() 4167 4168 self._match(TokenType.ALIAS) 4169 alias = self._parse_bitwise() 4170 if alias: 4171 if isinstance(alias, exp.Column) and not alias.db: 4172 alias = alias.this 4173 return self.expression(exp.PivotAlias, this=this, alias=alias) 4174 4175 return this 4176 4177 value = self._parse_column() 4178 4179 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4180 self.raise_error("Expecting IN (") 4181 4182 if self._match(TokenType.ANY): 4183 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4184 else: 4185 exprs = self._parse_csv(_parse_aliased_expression) 4186 4187 self._match_r_paren() 4188 return self.expression(exp.In, this=value, expressions=exprs) 4189 4190 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4191 index = self._index 4192 include_nulls = None 4193 4194 if self._match(TokenType.PIVOT): 4195 unpivot = False 4196 elif self._match(TokenType.UNPIVOT): 4197 unpivot = True 4198 4199 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4200 if self._match_text_seq("INCLUDE", "NULLS"): 4201 include_nulls = True 4202 elif self._match_text_seq("EXCLUDE", "NULLS"): 4203 include_nulls = False 4204 else: 4205 return None 4206 4207 expressions = [] 4208 4209 if not self._match(TokenType.L_PAREN): 4210 self._retreat(index) 4211 return None 4212 4213 if unpivot: 4214 expressions = self._parse_csv(self._parse_column) 4215 else: 4216 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4217 4218 if not expressions: 4219 self.raise_error("Failed to parse PIVOT's aggregation list") 4220 4221 if not self._match(TokenType.FOR): 4222 self.raise_error("Expecting FOR") 4223 4224 field = self._parse_pivot_in() 4225 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4226 self._parse_bitwise 4227 ) 4228 4229 self._match_r_paren() 4230 4231 pivot = self.expression( 4232 exp.Pivot, 4233 expressions=expressions, 4234 field=field, 4235 unpivot=unpivot, 4236 include_nulls=include_nulls, 4237 default_on_null=default_on_null, 4238 ) 4239 4240 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4241 pivot.set("alias", self._parse_table_alias()) 4242 4243 if not unpivot: 4244 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4245 4246 columns: t.List[exp.Expression] = [] 4247 pivot_field_expressions = pivot.args["field"].expressions 4248 4249 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4250 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4251 for fld in pivot_field_expressions: 4252 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4253 for name in names: 4254 if self.PREFIXED_PIVOT_COLUMNS: 4255 name = f"{name}_{field_name}" if name else field_name 4256 else: 4257 name = f"{field_name}_{name}" if name else field_name 4258 4259 columns.append(exp.to_identifier(name)) 4260 4261 pivot.set("columns", columns) 4262 4263 return pivot 4264 4265 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4266 return [agg.alias for agg in aggregations] 4267 4268 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4269 if not skip_where_token and not self._match(TokenType.PREWHERE): 4270 return None 4271 4272 return self.expression( 4273 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4274 ) 4275 4276 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4277 if not skip_where_token and not self._match(TokenType.WHERE): 4278 return None 4279 4280 return self.expression( 4281 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4282 ) 4283 4284 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4285 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4286 return None 4287 4288 elements: t.Dict[str, t.Any] = defaultdict(list) 4289 4290 if self._match(TokenType.ALL): 4291 elements["all"] = True 4292 elif self._match(TokenType.DISTINCT): 4293 elements["all"] = False 4294 4295 while True: 4296 index = self._index 4297 4298 elements["expressions"].extend( 4299 self._parse_csv( 4300 lambda: None 4301 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4302 else self._parse_assignment() 4303 ) 4304 ) 4305 4306 before_with_index = self._index 4307 with_prefix = self._match(TokenType.WITH) 4308 4309 if self._match(TokenType.ROLLUP): 4310 elements["rollup"].append( 4311 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4312 ) 4313 elif self._match(TokenType.CUBE): 4314 elements["cube"].append( 4315 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4316 ) 4317 elif self._match(TokenType.GROUPING_SETS): 4318 elements["grouping_sets"].append( 4319 self.expression( 4320 exp.GroupingSets, 4321 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4322 ) 4323 ) 4324 elif self._match_text_seq("TOTALS"): 4325 elements["totals"] = True # type: ignore 4326 4327 if before_with_index <= self._index <= before_with_index + 1: 4328 self._retreat(before_with_index) 4329 break 4330 4331 if index == self._index: 4332 break 4333 4334 return self.expression(exp.Group, **elements) # type: ignore 4335 4336 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4337 return self.expression( 4338 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4339 ) 4340 4341 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4342 if self._match(TokenType.L_PAREN): 4343 grouping_set = self._parse_csv(self._parse_column) 4344 self._match_r_paren() 4345 return self.expression(exp.Tuple, expressions=grouping_set) 4346 4347 return self._parse_column() 4348 4349 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4350 if not skip_having_token and not self._match(TokenType.HAVING): 4351 return None 4352 return self.expression(exp.Having, this=self._parse_assignment()) 4353 4354 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4355 if not self._match(TokenType.QUALIFY): 4356 return None 4357 return self.expression(exp.Qualify, this=self._parse_assignment()) 4358 4359 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4360 if skip_start_token: 4361 start = None 4362 elif self._match(TokenType.START_WITH): 4363 start = self._parse_assignment() 4364 else: 4365 return None 4366 4367 self._match(TokenType.CONNECT_BY) 4368 nocycle = self._match_text_seq("NOCYCLE") 4369 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4370 exp.Prior, this=self._parse_bitwise() 4371 ) 4372 connect = self._parse_assignment() 4373 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4374 4375 if not start and self._match(TokenType.START_WITH): 4376 start = self._parse_assignment() 4377 4378 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4379 4380 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4381 this = self._parse_id_var(any_token=True) 4382 if self._match(TokenType.ALIAS): 4383 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4384 return this 4385 4386 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4387 if self._match_text_seq("INTERPOLATE"): 4388 return self._parse_wrapped_csv(self._parse_name_as_expression) 4389 return None 4390 4391 def _parse_order( 4392 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4393 ) -> t.Optional[exp.Expression]: 4394 siblings = None 4395 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4396 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4397 return this 4398 4399 siblings = True 4400 4401 return self.expression( 4402 exp.Order, 4403 this=this, 4404 expressions=self._parse_csv(self._parse_ordered), 4405 siblings=siblings, 4406 ) 4407 4408 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4409 if not self._match(token): 4410 return None 4411 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4412 4413 def _parse_ordered( 4414 self, parse_method: t.Optional[t.Callable] = None 4415 ) -> t.Optional[exp.Ordered]: 4416 this = parse_method() if parse_method else self._parse_assignment() 4417 if not this: 4418 return None 4419 4420 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4421 this = exp.var("ALL") 4422 4423 asc = self._match(TokenType.ASC) 4424 desc = self._match(TokenType.DESC) or (asc and False) 4425 4426 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4427 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4428 4429 nulls_first = is_nulls_first or False 4430 explicitly_null_ordered = is_nulls_first or is_nulls_last 4431 4432 if ( 4433 not explicitly_null_ordered 4434 and ( 4435 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4436 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4437 ) 4438 and self.dialect.NULL_ORDERING != "nulls_are_last" 4439 ): 4440 nulls_first = True 4441 4442 if self._match_text_seq("WITH", "FILL"): 4443 with_fill = self.expression( 4444 exp.WithFill, 4445 **{ # type: ignore 4446 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4447 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4448 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4449 "interpolate": self._parse_interpolate(), 4450 }, 4451 ) 4452 else: 4453 with_fill = None 4454 4455 return self.expression( 4456 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4457 ) 4458 4459 def _parse_limit_options(self) -> exp.LimitOptions: 4460 percent = self._match(TokenType.PERCENT) 4461 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4462 self._match_text_seq("ONLY") 4463 with_ties = self._match_text_seq("WITH", "TIES") 4464 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4465 4466 def _parse_limit( 4467 self, 4468 this: t.Optional[exp.Expression] = None, 4469 top: bool = False, 4470 skip_limit_token: bool = False, 4471 ) -> t.Optional[exp.Expression]: 4472 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4473 comments = self._prev_comments 4474 if top: 4475 limit_paren = self._match(TokenType.L_PAREN) 4476 expression = self._parse_term() if limit_paren else self._parse_number() 4477 4478 if limit_paren: 4479 self._match_r_paren() 4480 4481 limit_options = self._parse_limit_options() 4482 else: 4483 limit_options = None 4484 expression = self._parse_term() 4485 4486 if self._match(TokenType.COMMA): 4487 offset = expression 4488 expression = self._parse_term() 4489 else: 4490 offset = None 4491 4492 limit_exp = self.expression( 4493 exp.Limit, 4494 this=this, 4495 expression=expression, 4496 offset=offset, 4497 comments=comments, 4498 limit_options=limit_options, 4499 expressions=self._parse_limit_by(), 4500 ) 4501 4502 return limit_exp 4503 4504 if self._match(TokenType.FETCH): 4505 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4506 direction = self._prev.text.upper() if direction else "FIRST" 4507 4508 count = self._parse_field(tokens=self.FETCH_TOKENS) 4509 4510 return self.expression( 4511 exp.Fetch, 4512 direction=direction, 4513 count=count, 4514 limit_options=self._parse_limit_options(), 4515 ) 4516 4517 return this 4518 4519 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4520 if not self._match(TokenType.OFFSET): 4521 return this 4522 4523 count = self._parse_term() 4524 self._match_set((TokenType.ROW, TokenType.ROWS)) 4525 4526 return self.expression( 4527 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4528 ) 4529 4530 def _can_parse_limit_or_offset(self) -> bool: 4531 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4532 return False 4533 4534 index = self._index 4535 result = bool( 4536 self._try_parse(self._parse_limit, retreat=True) 4537 or self._try_parse(self._parse_offset, retreat=True) 4538 ) 4539 self._retreat(index) 4540 return result 4541 4542 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4543 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4544 4545 def _parse_locks(self) -> t.List[exp.Lock]: 4546 locks = [] 4547 while True: 4548 if self._match_text_seq("FOR", "UPDATE"): 4549 update = True 4550 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4551 "LOCK", "IN", "SHARE", "MODE" 4552 ): 4553 update = False 4554 else: 4555 break 4556 4557 expressions = None 4558 if self._match_text_seq("OF"): 4559 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4560 4561 wait: t.Optional[bool | exp.Expression] = None 4562 if self._match_text_seq("NOWAIT"): 4563 wait = True 4564 elif self._match_text_seq("WAIT"): 4565 wait = self._parse_primary() 4566 elif self._match_text_seq("SKIP", "LOCKED"): 4567 wait = False 4568 4569 locks.append( 4570 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4571 ) 4572 4573 return locks 4574 4575 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4576 while this and self._match_set(self.SET_OPERATIONS): 4577 token_type = self._prev.token_type 4578 4579 if token_type == TokenType.UNION: 4580 operation: t.Type[exp.SetOperation] = exp.Union 4581 elif token_type == TokenType.EXCEPT: 4582 operation = exp.Except 4583 else: 4584 operation = exp.Intersect 4585 4586 comments = self._prev.comments 4587 4588 if self._match(TokenType.DISTINCT): 4589 distinct: t.Optional[bool] = True 4590 elif self._match(TokenType.ALL): 4591 distinct = False 4592 else: 4593 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4594 if distinct is None: 4595 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4596 4597 by_name = self._match_text_seq("BY", "NAME") 4598 expression = self._parse_select(nested=True, parse_set_operation=False) 4599 4600 this = self.expression( 4601 operation, 4602 comments=comments, 4603 this=this, 4604 distinct=distinct, 4605 by_name=by_name, 4606 expression=expression, 4607 ) 4608 4609 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4610 expression = this.expression 4611 4612 if expression: 4613 for arg in self.SET_OP_MODIFIERS: 4614 expr = expression.args.get(arg) 4615 if expr: 4616 this.set(arg, expr.pop()) 4617 4618 return this 4619 4620 def _parse_expression(self) -> t.Optional[exp.Expression]: 4621 return self._parse_alias(self._parse_assignment()) 4622 4623 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4624 this = self._parse_disjunction() 4625 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4626 # This allows us to parse <non-identifier token> := <expr> 4627 this = exp.column( 4628 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4629 ) 4630 4631 while self._match_set(self.ASSIGNMENT): 4632 if isinstance(this, exp.Column) and len(this.parts) == 1: 4633 this = this.this 4634 4635 this = self.expression( 4636 self.ASSIGNMENT[self._prev.token_type], 4637 this=this, 4638 comments=self._prev_comments, 4639 expression=self._parse_assignment(), 4640 ) 4641 4642 return this 4643 4644 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4645 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4646 4647 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4648 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4649 4650 def _parse_equality(self) -> t.Optional[exp.Expression]: 4651 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4652 4653 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4654 return self._parse_tokens(self._parse_range, self.COMPARISON) 4655 4656 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4657 this = this or self._parse_bitwise() 4658 negate = self._match(TokenType.NOT) 4659 4660 if self._match_set(self.RANGE_PARSERS): 4661 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4662 if not expression: 4663 return this 4664 4665 this = expression 4666 elif self._match(TokenType.ISNULL): 4667 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4668 4669 # Postgres supports ISNULL and NOTNULL for conditions. 4670 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4671 if self._match(TokenType.NOTNULL): 4672 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4673 this = self.expression(exp.Not, this=this) 4674 4675 if negate: 4676 this = self._negate_range(this) 4677 4678 if self._match(TokenType.IS): 4679 this = self._parse_is(this) 4680 4681 return this 4682 4683 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4684 if not this: 4685 return this 4686 4687 return self.expression(exp.Not, this=this) 4688 4689 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 index = self._index - 1 4691 negate = self._match(TokenType.NOT) 4692 4693 if self._match_text_seq("DISTINCT", "FROM"): 4694 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4695 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4696 4697 if self._match(TokenType.JSON): 4698 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4699 4700 if self._match_text_seq("WITH"): 4701 _with = True 4702 elif self._match_text_seq("WITHOUT"): 4703 _with = False 4704 else: 4705 _with = None 4706 4707 unique = self._match(TokenType.UNIQUE) 4708 self._match_text_seq("KEYS") 4709 expression: t.Optional[exp.Expression] = self.expression( 4710 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4711 ) 4712 else: 4713 expression = self._parse_primary() or self._parse_null() 4714 if not expression: 4715 self._retreat(index) 4716 return None 4717 4718 this = self.expression(exp.Is, this=this, expression=expression) 4719 return self.expression(exp.Not, this=this) if negate else this 4720 4721 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4722 unnest = self._parse_unnest(with_alias=False) 4723 if unnest: 4724 this = self.expression(exp.In, this=this, unnest=unnest) 4725 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4726 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4727 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4728 4729 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4730 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4731 else: 4732 this = self.expression(exp.In, this=this, expressions=expressions) 4733 4734 if matched_l_paren: 4735 self._match_r_paren(this) 4736 elif not self._match(TokenType.R_BRACKET, expression=this): 4737 self.raise_error("Expecting ]") 4738 else: 4739 this = self.expression(exp.In, this=this, field=self._parse_column()) 4740 4741 return this 4742 4743 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4744 low = self._parse_bitwise() 4745 self._match(TokenType.AND) 4746 high = self._parse_bitwise() 4747 return self.expression(exp.Between, this=this, low=low, high=high) 4748 4749 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4750 if not self._match(TokenType.ESCAPE): 4751 return this 4752 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4753 4754 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4755 index = self._index 4756 4757 if not self._match(TokenType.INTERVAL) and match_interval: 4758 return None 4759 4760 if self._match(TokenType.STRING, advance=False): 4761 this = self._parse_primary() 4762 else: 4763 this = self._parse_term() 4764 4765 if not this or ( 4766 isinstance(this, exp.Column) 4767 and not this.table 4768 and not this.this.quoted 4769 and this.name.upper() == "IS" 4770 ): 4771 self._retreat(index) 4772 return None 4773 4774 unit = self._parse_function() or ( 4775 not self._match(TokenType.ALIAS, advance=False) 4776 and self._parse_var(any_token=True, upper=True) 4777 ) 4778 4779 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4780 # each INTERVAL expression into this canonical form so it's easy to transpile 4781 if this and this.is_number: 4782 this = exp.Literal.string(this.to_py()) 4783 elif this and this.is_string: 4784 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4785 if parts and unit: 4786 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4787 unit = None 4788 self._retreat(self._index - 1) 4789 4790 if len(parts) == 1: 4791 this = exp.Literal.string(parts[0][0]) 4792 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4793 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4794 unit = self.expression( 4795 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4796 ) 4797 4798 interval = self.expression(exp.Interval, this=this, unit=unit) 4799 4800 index = self._index 4801 self._match(TokenType.PLUS) 4802 4803 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4804 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4805 return self.expression( 4806 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4807 ) 4808 4809 self._retreat(index) 4810 return interval 4811 4812 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4813 this = self._parse_term() 4814 4815 while True: 4816 if self._match_set(self.BITWISE): 4817 this = self.expression( 4818 self.BITWISE[self._prev.token_type], 4819 this=this, 4820 expression=self._parse_term(), 4821 ) 4822 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4823 this = self.expression( 4824 exp.DPipe, 4825 this=this, 4826 expression=self._parse_term(), 4827 safe=not self.dialect.STRICT_STRING_CONCAT, 4828 ) 4829 elif self._match(TokenType.DQMARK): 4830 this = self.expression( 4831 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4832 ) 4833 elif self._match_pair(TokenType.LT, TokenType.LT): 4834 this = self.expression( 4835 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4836 ) 4837 elif self._match_pair(TokenType.GT, TokenType.GT): 4838 this = self.expression( 4839 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4840 ) 4841 else: 4842 break 4843 4844 return this 4845 4846 def _parse_term(self) -> t.Optional[exp.Expression]: 4847 this = self._parse_factor() 4848 4849 while self._match_set(self.TERM): 4850 klass = self.TERM[self._prev.token_type] 4851 comments = self._prev_comments 4852 expression = self._parse_factor() 4853 4854 this = self.expression(klass, this=this, comments=comments, expression=expression) 4855 4856 if isinstance(this, exp.Collate): 4857 expr = this.expression 4858 4859 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4860 # fallback to Identifier / Var 4861 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4862 ident = expr.this 4863 if isinstance(ident, exp.Identifier): 4864 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4865 4866 return this 4867 4868 def _parse_factor(self) -> t.Optional[exp.Expression]: 4869 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4870 this = parse_method() 4871 4872 while self._match_set(self.FACTOR): 4873 klass = self.FACTOR[self._prev.token_type] 4874 comments = self._prev_comments 4875 expression = parse_method() 4876 4877 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4878 self._retreat(self._index - 1) 4879 return this 4880 4881 this = self.expression(klass, this=this, comments=comments, expression=expression) 4882 4883 if isinstance(this, exp.Div): 4884 this.args["typed"] = self.dialect.TYPED_DIVISION 4885 this.args["safe"] = self.dialect.SAFE_DIVISION 4886 4887 return this 4888 4889 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4890 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4891 4892 def _parse_unary(self) -> t.Optional[exp.Expression]: 4893 if self._match_set(self.UNARY_PARSERS): 4894 return self.UNARY_PARSERS[self._prev.token_type](self) 4895 return self._parse_at_time_zone(self._parse_type()) 4896 4897 def _parse_type( 4898 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4899 ) -> t.Optional[exp.Expression]: 4900 interval = parse_interval and self._parse_interval() 4901 if interval: 4902 return interval 4903 4904 index = self._index 4905 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4906 4907 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4908 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4909 if isinstance(data_type, exp.Cast): 4910 # This constructor can contain ops directly after it, for instance struct unnesting: 4911 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4912 return self._parse_column_ops(data_type) 4913 4914 if data_type: 4915 index2 = self._index 4916 this = self._parse_primary() 4917 4918 if isinstance(this, exp.Literal): 4919 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4920 if parser: 4921 return parser(self, this, data_type) 4922 4923 return self.expression(exp.Cast, this=this, to=data_type) 4924 4925 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4926 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4927 # 4928 # If the index difference here is greater than 1, that means the parser itself must have 4929 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4930 # 4931 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4932 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4933 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4934 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4935 # 4936 # In these cases, we don't really want to return the converted type, but instead retreat 4937 # and try to parse a Column or Identifier in the section below. 4938 if data_type.expressions and index2 - index > 1: 4939 self._retreat(index2) 4940 return self._parse_column_ops(data_type) 4941 4942 self._retreat(index) 4943 4944 if fallback_to_identifier: 4945 return self._parse_id_var() 4946 4947 this = self._parse_column() 4948 return this and self._parse_column_ops(this) 4949 4950 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4951 this = self._parse_type() 4952 if not this: 4953 return None 4954 4955 if isinstance(this, exp.Column) and not this.table: 4956 this = exp.var(this.name.upper()) 4957 4958 return self.expression( 4959 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4960 ) 4961 4962 def _parse_types( 4963 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4964 ) -> t.Optional[exp.Expression]: 4965 index = self._index 4966 4967 this: t.Optional[exp.Expression] = None 4968 prefix = self._match_text_seq("SYSUDTLIB", ".") 4969 4970 if not self._match_set(self.TYPE_TOKENS): 4971 identifier = allow_identifiers and self._parse_id_var( 4972 any_token=False, tokens=(TokenType.VAR,) 4973 ) 4974 if isinstance(identifier, exp.Identifier): 4975 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4976 4977 if len(tokens) != 1: 4978 self.raise_error("Unexpected identifier", self._prev) 4979 4980 if tokens[0].token_type in self.TYPE_TOKENS: 4981 self._prev = tokens[0] 4982 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4983 type_name = identifier.name 4984 4985 while self._match(TokenType.DOT): 4986 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4987 4988 this = exp.DataType.build(type_name, udt=True) 4989 else: 4990 self._retreat(self._index - 1) 4991 return None 4992 else: 4993 return None 4994 4995 type_token = self._prev.token_type 4996 4997 if type_token == TokenType.PSEUDO_TYPE: 4998 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4999 5000 if type_token == TokenType.OBJECT_IDENTIFIER: 5001 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5002 5003 # https://materialize.com/docs/sql/types/map/ 5004 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5005 key_type = self._parse_types( 5006 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5007 ) 5008 if not self._match(TokenType.FARROW): 5009 self._retreat(index) 5010 return None 5011 5012 value_type = self._parse_types( 5013 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5014 ) 5015 if not self._match(TokenType.R_BRACKET): 5016 self._retreat(index) 5017 return None 5018 5019 return exp.DataType( 5020 this=exp.DataType.Type.MAP, 5021 expressions=[key_type, value_type], 5022 nested=True, 5023 prefix=prefix, 5024 ) 5025 5026 nested = type_token in self.NESTED_TYPE_TOKENS 5027 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5028 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5029 expressions = None 5030 maybe_func = False 5031 5032 if self._match(TokenType.L_PAREN): 5033 if is_struct: 5034 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5035 elif nested: 5036 expressions = self._parse_csv( 5037 lambda: self._parse_types( 5038 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5039 ) 5040 ) 5041 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5042 this = expressions[0] 5043 this.set("nullable", True) 5044 self._match_r_paren() 5045 return this 5046 elif type_token in self.ENUM_TYPE_TOKENS: 5047 expressions = self._parse_csv(self._parse_equality) 5048 elif is_aggregate: 5049 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5050 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5051 ) 5052 if not func_or_ident: 5053 return None 5054 expressions = [func_or_ident] 5055 if self._match(TokenType.COMMA): 5056 expressions.extend( 5057 self._parse_csv( 5058 lambda: self._parse_types( 5059 check_func=check_func, 5060 schema=schema, 5061 allow_identifiers=allow_identifiers, 5062 ) 5063 ) 5064 ) 5065 else: 5066 expressions = self._parse_csv(self._parse_type_size) 5067 5068 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5069 if type_token == TokenType.VECTOR and len(expressions) == 2: 5070 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5071 5072 if not expressions or not self._match(TokenType.R_PAREN): 5073 self._retreat(index) 5074 return None 5075 5076 maybe_func = True 5077 5078 values: t.Optional[t.List[exp.Expression]] = None 5079 5080 if nested and self._match(TokenType.LT): 5081 if is_struct: 5082 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5083 else: 5084 expressions = self._parse_csv( 5085 lambda: self._parse_types( 5086 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5087 ) 5088 ) 5089 5090 if not self._match(TokenType.GT): 5091 self.raise_error("Expecting >") 5092 5093 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5094 values = self._parse_csv(self._parse_assignment) 5095 if not values and is_struct: 5096 values = None 5097 self._retreat(self._index - 1) 5098 else: 5099 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5100 5101 if type_token in self.TIMESTAMPS: 5102 if self._match_text_seq("WITH", "TIME", "ZONE"): 5103 maybe_func = False 5104 tz_type = ( 5105 exp.DataType.Type.TIMETZ 5106 if type_token in self.TIMES 5107 else exp.DataType.Type.TIMESTAMPTZ 5108 ) 5109 this = exp.DataType(this=tz_type, expressions=expressions) 5110 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5111 maybe_func = False 5112 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5113 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5114 maybe_func = False 5115 elif type_token == TokenType.INTERVAL: 5116 unit = self._parse_var(upper=True) 5117 if unit: 5118 if self._match_text_seq("TO"): 5119 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5120 5121 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5122 else: 5123 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5124 5125 if maybe_func and check_func: 5126 index2 = self._index 5127 peek = self._parse_string() 5128 5129 if not peek: 5130 self._retreat(index) 5131 return None 5132 5133 self._retreat(index2) 5134 5135 if not this: 5136 if self._match_text_seq("UNSIGNED"): 5137 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5138 if not unsigned_type_token: 5139 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5140 5141 type_token = unsigned_type_token or type_token 5142 5143 this = exp.DataType( 5144 this=exp.DataType.Type[type_token.value], 5145 expressions=expressions, 5146 nested=nested, 5147 prefix=prefix, 5148 ) 5149 5150 # Empty arrays/structs are allowed 5151 if values is not None: 5152 cls = exp.Struct if is_struct else exp.Array 5153 this = exp.cast(cls(expressions=values), this, copy=False) 5154 5155 elif expressions: 5156 this.set("expressions", expressions) 5157 5158 # https://materialize.com/docs/sql/types/list/#type-name 5159 while self._match(TokenType.LIST): 5160 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5161 5162 index = self._index 5163 5164 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5165 matched_array = self._match(TokenType.ARRAY) 5166 5167 while self._curr: 5168 datatype_token = self._prev.token_type 5169 matched_l_bracket = self._match(TokenType.L_BRACKET) 5170 5171 if (not matched_l_bracket and not matched_array) or ( 5172 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5173 ): 5174 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5175 # not to be confused with the fixed size array parsing 5176 break 5177 5178 matched_array = False 5179 values = self._parse_csv(self._parse_assignment) or None 5180 if ( 5181 values 5182 and not schema 5183 and ( 5184 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5185 ) 5186 ): 5187 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5188 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5189 self._retreat(index) 5190 break 5191 5192 this = exp.DataType( 5193 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5194 ) 5195 self._match(TokenType.R_BRACKET) 5196 5197 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5198 converter = self.TYPE_CONVERTERS.get(this.this) 5199 if converter: 5200 this = converter(t.cast(exp.DataType, this)) 5201 5202 return this 5203 5204 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5205 index = self._index 5206 5207 if ( 5208 self._curr 5209 and self._next 5210 and self._curr.token_type in self.TYPE_TOKENS 5211 and self._next.token_type in self.TYPE_TOKENS 5212 ): 5213 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5214 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5215 this = self._parse_id_var() 5216 else: 5217 this = ( 5218 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5219 or self._parse_id_var() 5220 ) 5221 5222 self._match(TokenType.COLON) 5223 5224 if ( 5225 type_required 5226 and not isinstance(this, exp.DataType) 5227 and not self._match_set(self.TYPE_TOKENS, advance=False) 5228 ): 5229 self._retreat(index) 5230 return self._parse_types() 5231 5232 return self._parse_column_def(this) 5233 5234 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5235 if not self._match_text_seq("AT", "TIME", "ZONE"): 5236 return this 5237 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5238 5239 def _parse_column(self) -> t.Optional[exp.Expression]: 5240 this = self._parse_column_reference() 5241 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5242 5243 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5244 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5245 5246 return column 5247 5248 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5249 this = self._parse_field() 5250 if ( 5251 not this 5252 and self._match(TokenType.VALUES, advance=False) 5253 and self.VALUES_FOLLOWED_BY_PAREN 5254 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5255 ): 5256 this = self._parse_id_var() 5257 5258 if isinstance(this, exp.Identifier): 5259 # We bubble up comments from the Identifier to the Column 5260 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5261 5262 return this 5263 5264 def _parse_colon_as_variant_extract( 5265 self, this: t.Optional[exp.Expression] 5266 ) -> t.Optional[exp.Expression]: 5267 casts = [] 5268 json_path = [] 5269 escape = None 5270 5271 while self._match(TokenType.COLON): 5272 start_index = self._index 5273 5274 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5275 path = self._parse_column_ops( 5276 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5277 ) 5278 5279 # The cast :: operator has a lower precedence than the extraction operator :, so 5280 # we rearrange the AST appropriately to avoid casting the JSON path 5281 while isinstance(path, exp.Cast): 5282 casts.append(path.to) 5283 path = path.this 5284 5285 if casts: 5286 dcolon_offset = next( 5287 i 5288 for i, t in enumerate(self._tokens[start_index:]) 5289 if t.token_type == TokenType.DCOLON 5290 ) 5291 end_token = self._tokens[start_index + dcolon_offset - 1] 5292 else: 5293 end_token = self._prev 5294 5295 if path: 5296 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5297 # it'll roundtrip to a string literal in GET_PATH 5298 if isinstance(path, exp.Identifier) and path.quoted: 5299 escape = True 5300 5301 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5302 5303 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5304 # Databricks transforms it back to the colon/dot notation 5305 if json_path: 5306 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5307 5308 if json_path_expr: 5309 json_path_expr.set("escape", escape) 5310 5311 this = self.expression( 5312 exp.JSONExtract, 5313 this=this, 5314 expression=json_path_expr, 5315 variant_extract=True, 5316 ) 5317 5318 while casts: 5319 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5320 5321 return this 5322 5323 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5324 return self._parse_types() 5325 5326 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5327 this = self._parse_bracket(this) 5328 5329 while self._match_set(self.COLUMN_OPERATORS): 5330 op_token = self._prev.token_type 5331 op = self.COLUMN_OPERATORS.get(op_token) 5332 5333 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5334 field = self._parse_dcolon() 5335 if not field: 5336 self.raise_error("Expected type") 5337 elif op and self._curr: 5338 field = self._parse_column_reference() or self._parse_bracket() 5339 else: 5340 field = self._parse_field(any_token=True, anonymous_func=True) 5341 5342 if isinstance(field, (exp.Func, exp.Window)) and this: 5343 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5344 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5345 this = exp.replace_tree( 5346 this, 5347 lambda n: ( 5348 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5349 if n.table 5350 else n.this 5351 ) 5352 if isinstance(n, exp.Column) 5353 else n, 5354 ) 5355 5356 if op: 5357 this = op(self, this, field) 5358 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5359 this = self.expression( 5360 exp.Column, 5361 comments=this.comments, 5362 this=field, 5363 table=this.this, 5364 db=this.args.get("table"), 5365 catalog=this.args.get("db"), 5366 ) 5367 elif isinstance(field, exp.Window): 5368 # Move the exp.Dot's to the window's function 5369 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5370 field.set("this", window_func) 5371 this = field 5372 else: 5373 this = self.expression(exp.Dot, this=this, expression=field) 5374 5375 if field and field.comments: 5376 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5377 5378 this = self._parse_bracket(this) 5379 5380 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5381 5382 def _parse_primary(self) -> t.Optional[exp.Expression]: 5383 if self._match_set(self.PRIMARY_PARSERS): 5384 token_type = self._prev.token_type 5385 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5386 5387 if token_type == TokenType.STRING: 5388 expressions = [primary] 5389 while self._match(TokenType.STRING): 5390 expressions.append(exp.Literal.string(self._prev.text)) 5391 5392 if len(expressions) > 1: 5393 return self.expression(exp.Concat, expressions=expressions) 5394 5395 return primary 5396 5397 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5398 return exp.Literal.number(f"0.{self._prev.text}") 5399 5400 if self._match(TokenType.L_PAREN): 5401 comments = self._prev_comments 5402 query = self._parse_select() 5403 5404 if query: 5405 expressions = [query] 5406 else: 5407 expressions = self._parse_expressions() 5408 5409 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5410 5411 if not this and self._match(TokenType.R_PAREN, advance=False): 5412 this = self.expression(exp.Tuple) 5413 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5414 this = self._parse_subquery(this=this, parse_alias=False) 5415 elif isinstance(this, exp.Subquery): 5416 this = self._parse_subquery( 5417 this=self._parse_set_operations(this), parse_alias=False 5418 ) 5419 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5420 this = self.expression(exp.Tuple, expressions=expressions) 5421 else: 5422 this = self.expression(exp.Paren, this=this) 5423 5424 if this: 5425 this.add_comments(comments) 5426 5427 self._match_r_paren(expression=this) 5428 return this 5429 5430 return None 5431 5432 def _parse_field( 5433 self, 5434 any_token: bool = False, 5435 tokens: t.Optional[t.Collection[TokenType]] = None, 5436 anonymous_func: bool = False, 5437 ) -> t.Optional[exp.Expression]: 5438 if anonymous_func: 5439 field = ( 5440 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5441 or self._parse_primary() 5442 ) 5443 else: 5444 field = self._parse_primary() or self._parse_function( 5445 anonymous=anonymous_func, any_token=any_token 5446 ) 5447 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5448 5449 def _parse_function( 5450 self, 5451 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5452 anonymous: bool = False, 5453 optional_parens: bool = True, 5454 any_token: bool = False, 5455 ) -> t.Optional[exp.Expression]: 5456 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5457 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5458 fn_syntax = False 5459 if ( 5460 self._match(TokenType.L_BRACE, advance=False) 5461 and self._next 5462 and self._next.text.upper() == "FN" 5463 ): 5464 self._advance(2) 5465 fn_syntax = True 5466 5467 func = self._parse_function_call( 5468 functions=functions, 5469 anonymous=anonymous, 5470 optional_parens=optional_parens, 5471 any_token=any_token, 5472 ) 5473 5474 if fn_syntax: 5475 self._match(TokenType.R_BRACE) 5476 5477 return func 5478 5479 def _parse_function_call( 5480 self, 5481 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5482 anonymous: bool = False, 5483 optional_parens: bool = True, 5484 any_token: bool = False, 5485 ) -> t.Optional[exp.Expression]: 5486 if not self._curr: 5487 return None 5488 5489 comments = self._curr.comments 5490 token_type = self._curr.token_type 5491 this = self._curr.text 5492 upper = this.upper() 5493 5494 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5495 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5496 self._advance() 5497 return self._parse_window(parser(self)) 5498 5499 if not self._next or self._next.token_type != TokenType.L_PAREN: 5500 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5501 self._advance() 5502 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5503 5504 return None 5505 5506 if any_token: 5507 if token_type in self.RESERVED_TOKENS: 5508 return None 5509 elif token_type not in self.FUNC_TOKENS: 5510 return None 5511 5512 self._advance(2) 5513 5514 parser = self.FUNCTION_PARSERS.get(upper) 5515 if parser and not anonymous: 5516 this = parser(self) 5517 else: 5518 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5519 5520 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5521 this = self.expression( 5522 subquery_predicate, comments=comments, this=self._parse_select() 5523 ) 5524 self._match_r_paren() 5525 return this 5526 5527 if functions is None: 5528 functions = self.FUNCTIONS 5529 5530 function = functions.get(upper) 5531 known_function = function and not anonymous 5532 5533 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5534 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5535 5536 post_func_comments = self._curr and self._curr.comments 5537 if known_function and post_func_comments: 5538 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5539 # call we'll construct it as exp.Anonymous, even if it's "known" 5540 if any( 5541 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5542 for comment in post_func_comments 5543 ): 5544 known_function = False 5545 5546 if alias and known_function: 5547 args = self._kv_to_prop_eq(args) 5548 5549 if known_function: 5550 func_builder = t.cast(t.Callable, function) 5551 5552 if "dialect" in func_builder.__code__.co_varnames: 5553 func = func_builder(args, dialect=self.dialect) 5554 else: 5555 func = func_builder(args) 5556 5557 func = self.validate_expression(func, args) 5558 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5559 func.meta["name"] = this 5560 5561 this = func 5562 else: 5563 if token_type == TokenType.IDENTIFIER: 5564 this = exp.Identifier(this=this, quoted=True) 5565 this = self.expression(exp.Anonymous, this=this, expressions=args) 5566 5567 if isinstance(this, exp.Expression): 5568 this.add_comments(comments) 5569 5570 self._match_r_paren(this) 5571 return self._parse_window(this) 5572 5573 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5574 return expression 5575 5576 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5577 transformed = [] 5578 5579 for index, e in enumerate(expressions): 5580 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5581 if isinstance(e, exp.Alias): 5582 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5583 5584 if not isinstance(e, exp.PropertyEQ): 5585 e = self.expression( 5586 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5587 ) 5588 5589 if isinstance(e.this, exp.Column): 5590 e.this.replace(e.this.this) 5591 else: 5592 e = self._to_prop_eq(e, index) 5593 5594 transformed.append(e) 5595 5596 return transformed 5597 5598 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5599 return self._parse_statement() 5600 5601 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5602 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5603 5604 def _parse_user_defined_function( 5605 self, kind: t.Optional[TokenType] = None 5606 ) -> t.Optional[exp.Expression]: 5607 this = self._parse_id_var() 5608 5609 while self._match(TokenType.DOT): 5610 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5611 5612 if not self._match(TokenType.L_PAREN): 5613 return this 5614 5615 expressions = self._parse_csv(self._parse_function_parameter) 5616 self._match_r_paren() 5617 return self.expression( 5618 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5619 ) 5620 5621 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5622 literal = self._parse_primary() 5623 if literal: 5624 return self.expression(exp.Introducer, this=token.text, expression=literal) 5625 5626 return self.expression(exp.Identifier, this=token.text) 5627 5628 def _parse_session_parameter(self) -> exp.SessionParameter: 5629 kind = None 5630 this = self._parse_id_var() or self._parse_primary() 5631 5632 if this and self._match(TokenType.DOT): 5633 kind = this.name 5634 this = self._parse_var() or self._parse_primary() 5635 5636 return self.expression(exp.SessionParameter, this=this, kind=kind) 5637 5638 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5639 return self._parse_id_var() 5640 5641 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5642 index = self._index 5643 5644 if self._match(TokenType.L_PAREN): 5645 expressions = t.cast( 5646 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5647 ) 5648 5649 if not self._match(TokenType.R_PAREN): 5650 self._retreat(index) 5651 else: 5652 expressions = [self._parse_lambda_arg()] 5653 5654 if self._match_set(self.LAMBDAS): 5655 return self.LAMBDAS[self._prev.token_type](self, expressions) 5656 5657 self._retreat(index) 5658 5659 this: t.Optional[exp.Expression] 5660 5661 if self._match(TokenType.DISTINCT): 5662 this = self.expression( 5663 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5664 ) 5665 else: 5666 this = self._parse_select_or_expression(alias=alias) 5667 5668 return self._parse_limit( 5669 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5670 ) 5671 5672 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5673 index = self._index 5674 if not self._match(TokenType.L_PAREN): 5675 return this 5676 5677 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5678 # expr can be of both types 5679 if self._match_set(self.SELECT_START_TOKENS): 5680 self._retreat(index) 5681 return this 5682 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5683 self._match_r_paren() 5684 return self.expression(exp.Schema, this=this, expressions=args) 5685 5686 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5687 return self._parse_column_def(self._parse_field(any_token=True)) 5688 5689 def _parse_column_def( 5690 self, this: t.Optional[exp.Expression], computed_column: bool = True 5691 ) -> t.Optional[exp.Expression]: 5692 # column defs are not really columns, they're identifiers 5693 if isinstance(this, exp.Column): 5694 this = this.this 5695 5696 if not computed_column: 5697 self._match(TokenType.ALIAS) 5698 5699 kind = self._parse_types(schema=True) 5700 5701 if self._match_text_seq("FOR", "ORDINALITY"): 5702 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5703 5704 constraints: t.List[exp.Expression] = [] 5705 5706 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5707 ("ALIAS", "MATERIALIZED") 5708 ): 5709 persisted = self._prev.text.upper() == "MATERIALIZED" 5710 constraint_kind = exp.ComputedColumnConstraint( 5711 this=self._parse_assignment(), 5712 persisted=persisted or self._match_text_seq("PERSISTED"), 5713 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5714 ) 5715 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5716 elif ( 5717 kind 5718 and self._match(TokenType.ALIAS, advance=False) 5719 and ( 5720 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5721 or (self._next and self._next.token_type == TokenType.L_PAREN) 5722 ) 5723 ): 5724 self._advance() 5725 constraints.append( 5726 self.expression( 5727 exp.ColumnConstraint, 5728 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5729 ) 5730 ) 5731 5732 while True: 5733 constraint = self._parse_column_constraint() 5734 if not constraint: 5735 break 5736 constraints.append(constraint) 5737 5738 if not kind and not constraints: 5739 return this 5740 5741 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5742 5743 def _parse_auto_increment( 5744 self, 5745 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5746 start = None 5747 increment = None 5748 5749 if self._match(TokenType.L_PAREN, advance=False): 5750 args = self._parse_wrapped_csv(self._parse_bitwise) 5751 start = seq_get(args, 0) 5752 increment = seq_get(args, 1) 5753 elif self._match_text_seq("START"): 5754 start = self._parse_bitwise() 5755 self._match_text_seq("INCREMENT") 5756 increment = self._parse_bitwise() 5757 5758 if start and increment: 5759 return exp.GeneratedAsIdentityColumnConstraint( 5760 start=start, increment=increment, this=False 5761 ) 5762 5763 return exp.AutoIncrementColumnConstraint() 5764 5765 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5766 if not self._match_text_seq("REFRESH"): 5767 self._retreat(self._index - 1) 5768 return None 5769 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5770 5771 def _parse_compress(self) -> exp.CompressColumnConstraint: 5772 if self._match(TokenType.L_PAREN, advance=False): 5773 return self.expression( 5774 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5775 ) 5776 5777 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5778 5779 def _parse_generated_as_identity( 5780 self, 5781 ) -> ( 5782 exp.GeneratedAsIdentityColumnConstraint 5783 | exp.ComputedColumnConstraint 5784 | exp.GeneratedAsRowColumnConstraint 5785 ): 5786 if self._match_text_seq("BY", "DEFAULT"): 5787 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5788 this = self.expression( 5789 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5790 ) 5791 else: 5792 self._match_text_seq("ALWAYS") 5793 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5794 5795 self._match(TokenType.ALIAS) 5796 5797 if self._match_text_seq("ROW"): 5798 start = self._match_text_seq("START") 5799 if not start: 5800 self._match(TokenType.END) 5801 hidden = self._match_text_seq("HIDDEN") 5802 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5803 5804 identity = self._match_text_seq("IDENTITY") 5805 5806 if self._match(TokenType.L_PAREN): 5807 if self._match(TokenType.START_WITH): 5808 this.set("start", self._parse_bitwise()) 5809 if self._match_text_seq("INCREMENT", "BY"): 5810 this.set("increment", self._parse_bitwise()) 5811 if self._match_text_seq("MINVALUE"): 5812 this.set("minvalue", self._parse_bitwise()) 5813 if self._match_text_seq("MAXVALUE"): 5814 this.set("maxvalue", self._parse_bitwise()) 5815 5816 if self._match_text_seq("CYCLE"): 5817 this.set("cycle", True) 5818 elif self._match_text_seq("NO", "CYCLE"): 5819 this.set("cycle", False) 5820 5821 if not identity: 5822 this.set("expression", self._parse_range()) 5823 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5824 args = self._parse_csv(self._parse_bitwise) 5825 this.set("start", seq_get(args, 0)) 5826 this.set("increment", seq_get(args, 1)) 5827 5828 self._match_r_paren() 5829 5830 return this 5831 5832 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5833 self._match_text_seq("LENGTH") 5834 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5835 5836 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5837 if self._match_text_seq("NULL"): 5838 return self.expression(exp.NotNullColumnConstraint) 5839 if self._match_text_seq("CASESPECIFIC"): 5840 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5841 if self._match_text_seq("FOR", "REPLICATION"): 5842 return self.expression(exp.NotForReplicationColumnConstraint) 5843 5844 # Unconsume the `NOT` token 5845 self._retreat(self._index - 1) 5846 return None 5847 5848 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5849 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5850 5851 procedure_option_follows = ( 5852 self._match(TokenType.WITH, advance=False) 5853 and self._next 5854 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5855 ) 5856 5857 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5858 return self.expression( 5859 exp.ColumnConstraint, 5860 this=this, 5861 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5862 ) 5863 5864 return this 5865 5866 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5867 if not self._match(TokenType.CONSTRAINT): 5868 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5869 5870 return self.expression( 5871 exp.Constraint, 5872 this=self._parse_id_var(), 5873 expressions=self._parse_unnamed_constraints(), 5874 ) 5875 5876 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5877 constraints = [] 5878 while True: 5879 constraint = self._parse_unnamed_constraint() or self._parse_function() 5880 if not constraint: 5881 break 5882 constraints.append(constraint) 5883 5884 return constraints 5885 5886 def _parse_unnamed_constraint( 5887 self, constraints: t.Optional[t.Collection[str]] = None 5888 ) -> t.Optional[exp.Expression]: 5889 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5890 constraints or self.CONSTRAINT_PARSERS 5891 ): 5892 return None 5893 5894 constraint = self._prev.text.upper() 5895 if constraint not in self.CONSTRAINT_PARSERS: 5896 self.raise_error(f"No parser found for schema constraint {constraint}.") 5897 5898 return self.CONSTRAINT_PARSERS[constraint](self) 5899 5900 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5901 return self._parse_id_var(any_token=False) 5902 5903 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5904 self._match_text_seq("KEY") 5905 return self.expression( 5906 exp.UniqueColumnConstraint, 5907 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5908 this=self._parse_schema(self._parse_unique_key()), 5909 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5910 on_conflict=self._parse_on_conflict(), 5911 ) 5912 5913 def _parse_key_constraint_options(self) -> t.List[str]: 5914 options = [] 5915 while True: 5916 if not self._curr: 5917 break 5918 5919 if self._match(TokenType.ON): 5920 action = None 5921 on = self._advance_any() and self._prev.text 5922 5923 if self._match_text_seq("NO", "ACTION"): 5924 action = "NO ACTION" 5925 elif self._match_text_seq("CASCADE"): 5926 action = "CASCADE" 5927 elif self._match_text_seq("RESTRICT"): 5928 action = "RESTRICT" 5929 elif self._match_pair(TokenType.SET, TokenType.NULL): 5930 action = "SET NULL" 5931 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5932 action = "SET DEFAULT" 5933 else: 5934 self.raise_error("Invalid key constraint") 5935 5936 options.append(f"ON {on} {action}") 5937 else: 5938 var = self._parse_var_from_options( 5939 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5940 ) 5941 if not var: 5942 break 5943 options.append(var.name) 5944 5945 return options 5946 5947 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5948 if match and not self._match(TokenType.REFERENCES): 5949 return None 5950 5951 expressions = None 5952 this = self._parse_table(schema=True) 5953 options = self._parse_key_constraint_options() 5954 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5955 5956 def _parse_foreign_key(self) -> exp.ForeignKey: 5957 expressions = self._parse_wrapped_id_vars() 5958 reference = self._parse_references() 5959 options = {} 5960 5961 while self._match(TokenType.ON): 5962 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5963 self.raise_error("Expected DELETE or UPDATE") 5964 5965 kind = self._prev.text.lower() 5966 5967 if self._match_text_seq("NO", "ACTION"): 5968 action = "NO ACTION" 5969 elif self._match(TokenType.SET): 5970 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5971 action = "SET " + self._prev.text.upper() 5972 else: 5973 self._advance() 5974 action = self._prev.text.upper() 5975 5976 options[kind] = action 5977 5978 return self.expression( 5979 exp.ForeignKey, 5980 expressions=expressions, 5981 reference=reference, 5982 **options, # type: ignore 5983 ) 5984 5985 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5986 return self._parse_ordered() or self._parse_field() 5987 5988 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5989 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5990 self._retreat(self._index - 1) 5991 return None 5992 5993 id_vars = self._parse_wrapped_id_vars() 5994 return self.expression( 5995 exp.PeriodForSystemTimeConstraint, 5996 this=seq_get(id_vars, 0), 5997 expression=seq_get(id_vars, 1), 5998 ) 5999 6000 def _parse_primary_key( 6001 self, wrapped_optional: bool = False, in_props: bool = False 6002 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6003 desc = ( 6004 self._match_set((TokenType.ASC, TokenType.DESC)) 6005 and self._prev.token_type == TokenType.DESC 6006 ) 6007 6008 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6009 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6010 6011 expressions = self._parse_wrapped_csv( 6012 self._parse_primary_key_part, optional=wrapped_optional 6013 ) 6014 options = self._parse_key_constraint_options() 6015 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6016 6017 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6018 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6019 6020 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6021 """ 6022 Parses a datetime column in ODBC format. We parse the column into the corresponding 6023 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6024 same as we did for `DATE('yyyy-mm-dd')`. 6025 6026 Reference: 6027 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6028 """ 6029 self._match(TokenType.VAR) 6030 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6031 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6032 if not self._match(TokenType.R_BRACE): 6033 self.raise_error("Expected }") 6034 return expression 6035 6036 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6037 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6038 return this 6039 6040 bracket_kind = self._prev.token_type 6041 if ( 6042 bracket_kind == TokenType.L_BRACE 6043 and self._curr 6044 and self._curr.token_type == TokenType.VAR 6045 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6046 ): 6047 return self._parse_odbc_datetime_literal() 6048 6049 expressions = self._parse_csv( 6050 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6051 ) 6052 6053 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6054 self.raise_error("Expected ]") 6055 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6056 self.raise_error("Expected }") 6057 6058 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6059 if bracket_kind == TokenType.L_BRACE: 6060 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6061 elif not this: 6062 this = build_array_constructor( 6063 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6064 ) 6065 else: 6066 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6067 if constructor_type: 6068 return build_array_constructor( 6069 constructor_type, 6070 args=expressions, 6071 bracket_kind=bracket_kind, 6072 dialect=self.dialect, 6073 ) 6074 6075 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6076 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6077 6078 self._add_comments(this) 6079 return self._parse_bracket(this) 6080 6081 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6082 if self._match(TokenType.COLON): 6083 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6084 return this 6085 6086 def _parse_case(self) -> t.Optional[exp.Expression]: 6087 ifs = [] 6088 default = None 6089 6090 comments = self._prev_comments 6091 expression = self._parse_assignment() 6092 6093 while self._match(TokenType.WHEN): 6094 this = self._parse_assignment() 6095 self._match(TokenType.THEN) 6096 then = self._parse_assignment() 6097 ifs.append(self.expression(exp.If, this=this, true=then)) 6098 6099 if self._match(TokenType.ELSE): 6100 default = self._parse_assignment() 6101 6102 if not self._match(TokenType.END): 6103 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6104 default = exp.column("interval") 6105 else: 6106 self.raise_error("Expected END after CASE", self._prev) 6107 6108 return self.expression( 6109 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6110 ) 6111 6112 def _parse_if(self) -> t.Optional[exp.Expression]: 6113 if self._match(TokenType.L_PAREN): 6114 args = self._parse_csv(self._parse_assignment) 6115 this = self.validate_expression(exp.If.from_arg_list(args), args) 6116 self._match_r_paren() 6117 else: 6118 index = self._index - 1 6119 6120 if self.NO_PAREN_IF_COMMANDS and index == 0: 6121 return self._parse_as_command(self._prev) 6122 6123 condition = self._parse_assignment() 6124 6125 if not condition: 6126 self._retreat(index) 6127 return None 6128 6129 self._match(TokenType.THEN) 6130 true = self._parse_assignment() 6131 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6132 self._match(TokenType.END) 6133 this = self.expression(exp.If, this=condition, true=true, false=false) 6134 6135 return this 6136 6137 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6138 if not self._match_text_seq("VALUE", "FOR"): 6139 self._retreat(self._index - 1) 6140 return None 6141 6142 return self.expression( 6143 exp.NextValueFor, 6144 this=self._parse_column(), 6145 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6146 ) 6147 6148 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6149 this = self._parse_function() or self._parse_var_or_string(upper=True) 6150 6151 if self._match(TokenType.FROM): 6152 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6153 6154 if not self._match(TokenType.COMMA): 6155 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6156 6157 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6158 6159 def _parse_gap_fill(self) -> exp.GapFill: 6160 self._match(TokenType.TABLE) 6161 this = self._parse_table() 6162 6163 self._match(TokenType.COMMA) 6164 args = [this, *self._parse_csv(self._parse_lambda)] 6165 6166 gap_fill = exp.GapFill.from_arg_list(args) 6167 return self.validate_expression(gap_fill, args) 6168 6169 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6170 this = self._parse_assignment() 6171 6172 if not self._match(TokenType.ALIAS): 6173 if self._match(TokenType.COMMA): 6174 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6175 6176 self.raise_error("Expected AS after CAST") 6177 6178 fmt = None 6179 to = self._parse_types() 6180 6181 default = self._match(TokenType.DEFAULT) 6182 if default: 6183 default = self._parse_bitwise() 6184 self._match_text_seq("ON", "CONVERSION", "ERROR") 6185 6186 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6187 fmt_string = self._parse_string() 6188 fmt = self._parse_at_time_zone(fmt_string) 6189 6190 if not to: 6191 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6192 if to.this in exp.DataType.TEMPORAL_TYPES: 6193 this = self.expression( 6194 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6195 this=this, 6196 format=exp.Literal.string( 6197 format_time( 6198 fmt_string.this if fmt_string else "", 6199 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6200 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6201 ) 6202 ), 6203 safe=safe, 6204 ) 6205 6206 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6207 this.set("zone", fmt.args["zone"]) 6208 return this 6209 elif not to: 6210 self.raise_error("Expected TYPE after CAST") 6211 elif isinstance(to, exp.Identifier): 6212 to = exp.DataType.build(to.name, udt=True) 6213 elif to.this == exp.DataType.Type.CHAR: 6214 if self._match(TokenType.CHARACTER_SET): 6215 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6216 6217 return self.expression( 6218 exp.Cast if strict else exp.TryCast, 6219 this=this, 6220 to=to, 6221 format=fmt, 6222 safe=safe, 6223 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6224 default=default, 6225 ) 6226 6227 def _parse_string_agg(self) -> exp.GroupConcat: 6228 if self._match(TokenType.DISTINCT): 6229 args: t.List[t.Optional[exp.Expression]] = [ 6230 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6231 ] 6232 if self._match(TokenType.COMMA): 6233 args.extend(self._parse_csv(self._parse_assignment)) 6234 else: 6235 args = self._parse_csv(self._parse_assignment) # type: ignore 6236 6237 if self._match_text_seq("ON", "OVERFLOW"): 6238 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6239 if self._match_text_seq("ERROR"): 6240 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6241 else: 6242 self._match_text_seq("TRUNCATE") 6243 on_overflow = self.expression( 6244 exp.OverflowTruncateBehavior, 6245 this=self._parse_string(), 6246 with_count=( 6247 self._match_text_seq("WITH", "COUNT") 6248 or not self._match_text_seq("WITHOUT", "COUNT") 6249 ), 6250 ) 6251 else: 6252 on_overflow = None 6253 6254 index = self._index 6255 if not self._match(TokenType.R_PAREN) and args: 6256 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6257 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6258 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6259 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6260 6261 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6262 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6263 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6264 if not self._match_text_seq("WITHIN", "GROUP"): 6265 self._retreat(index) 6266 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6267 6268 # The corresponding match_r_paren will be called in parse_function (caller) 6269 self._match_l_paren() 6270 6271 return self.expression( 6272 exp.GroupConcat, 6273 this=self._parse_order(this=seq_get(args, 0)), 6274 separator=seq_get(args, 1), 6275 on_overflow=on_overflow, 6276 ) 6277 6278 def _parse_convert( 6279 self, strict: bool, safe: t.Optional[bool] = None 6280 ) -> t.Optional[exp.Expression]: 6281 this = self._parse_bitwise() 6282 6283 if self._match(TokenType.USING): 6284 to: t.Optional[exp.Expression] = self.expression( 6285 exp.CharacterSet, this=self._parse_var() 6286 ) 6287 elif self._match(TokenType.COMMA): 6288 to = self._parse_types() 6289 else: 6290 to = None 6291 6292 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6293 6294 def _parse_xml_table(self) -> exp.XMLTable: 6295 namespaces = None 6296 passing = None 6297 columns = None 6298 6299 if self._match_text_seq("XMLNAMESPACES", "("): 6300 namespaces = self._parse_xml_namespace() 6301 self._match_text_seq(")", ",") 6302 6303 this = self._parse_string() 6304 6305 if self._match_text_seq("PASSING"): 6306 # The BY VALUE keywords are optional and are provided for semantic clarity 6307 self._match_text_seq("BY", "VALUE") 6308 passing = self._parse_csv(self._parse_column) 6309 6310 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6311 6312 if self._match_text_seq("COLUMNS"): 6313 columns = self._parse_csv(self._parse_field_def) 6314 6315 return self.expression( 6316 exp.XMLTable, 6317 this=this, 6318 namespaces=namespaces, 6319 passing=passing, 6320 columns=columns, 6321 by_ref=by_ref, 6322 ) 6323 6324 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6325 namespaces = [] 6326 6327 while True: 6328 if self._match(TokenType.DEFAULT): 6329 uri = self._parse_string() 6330 else: 6331 uri = self._parse_alias(self._parse_string()) 6332 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6333 if not self._match(TokenType.COMMA): 6334 break 6335 6336 return namespaces 6337 6338 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6339 """ 6340 There are generally two variants of the DECODE function: 6341 6342 - DECODE(bin, charset) 6343 - DECODE(expression, search, result [, search, result] ... [, default]) 6344 6345 The second variant will always be parsed into a CASE expression. Note that NULL 6346 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6347 instead of relying on pattern matching. 6348 """ 6349 args = self._parse_csv(self._parse_assignment) 6350 6351 if len(args) < 3: 6352 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6353 6354 expression, *expressions = args 6355 if not expression: 6356 return None 6357 6358 ifs = [] 6359 for search, result in zip(expressions[::2], expressions[1::2]): 6360 if not search or not result: 6361 return None 6362 6363 if isinstance(search, exp.Literal): 6364 ifs.append( 6365 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6366 ) 6367 elif isinstance(search, exp.Null): 6368 ifs.append( 6369 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6370 ) 6371 else: 6372 cond = exp.or_( 6373 exp.EQ(this=expression.copy(), expression=search), 6374 exp.and_( 6375 exp.Is(this=expression.copy(), expression=exp.Null()), 6376 exp.Is(this=search.copy(), expression=exp.Null()), 6377 copy=False, 6378 ), 6379 copy=False, 6380 ) 6381 ifs.append(exp.If(this=cond, true=result)) 6382 6383 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6384 6385 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6386 self._match_text_seq("KEY") 6387 key = self._parse_column() 6388 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6389 self._match_text_seq("VALUE") 6390 value = self._parse_bitwise() 6391 6392 if not key and not value: 6393 return None 6394 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6395 6396 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6397 if not this or not self._match_text_seq("FORMAT", "JSON"): 6398 return this 6399 6400 return self.expression(exp.FormatJson, this=this) 6401 6402 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6403 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6404 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6405 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6406 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6407 else: 6408 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6409 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6410 6411 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6412 6413 if not empty and not error and not null: 6414 return None 6415 6416 return self.expression( 6417 exp.OnCondition, 6418 empty=empty, 6419 error=error, 6420 null=null, 6421 ) 6422 6423 def _parse_on_handling( 6424 self, on: str, *values: str 6425 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6426 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6427 for value in values: 6428 if self._match_text_seq(value, "ON", on): 6429 return f"{value} ON {on}" 6430 6431 index = self._index 6432 if self._match(TokenType.DEFAULT): 6433 default_value = self._parse_bitwise() 6434 if self._match_text_seq("ON", on): 6435 return default_value 6436 6437 self._retreat(index) 6438 6439 return None 6440 6441 @t.overload 6442 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6443 6444 @t.overload 6445 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6446 6447 def _parse_json_object(self, agg=False): 6448 star = self._parse_star() 6449 expressions = ( 6450 [star] 6451 if star 6452 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6453 ) 6454 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6455 6456 unique_keys = None 6457 if self._match_text_seq("WITH", "UNIQUE"): 6458 unique_keys = True 6459 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6460 unique_keys = False 6461 6462 self._match_text_seq("KEYS") 6463 6464 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6465 self._parse_type() 6466 ) 6467 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6468 6469 return self.expression( 6470 exp.JSONObjectAgg if agg else exp.JSONObject, 6471 expressions=expressions, 6472 null_handling=null_handling, 6473 unique_keys=unique_keys, 6474 return_type=return_type, 6475 encoding=encoding, 6476 ) 6477 6478 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6479 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6480 if not self._match_text_seq("NESTED"): 6481 this = self._parse_id_var() 6482 kind = self._parse_types(allow_identifiers=False) 6483 nested = None 6484 else: 6485 this = None 6486 kind = None 6487 nested = True 6488 6489 path = self._match_text_seq("PATH") and self._parse_string() 6490 nested_schema = nested and self._parse_json_schema() 6491 6492 return self.expression( 6493 exp.JSONColumnDef, 6494 this=this, 6495 kind=kind, 6496 path=path, 6497 nested_schema=nested_schema, 6498 ) 6499 6500 def _parse_json_schema(self) -> exp.JSONSchema: 6501 self._match_text_seq("COLUMNS") 6502 return self.expression( 6503 exp.JSONSchema, 6504 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6505 ) 6506 6507 def _parse_json_table(self) -> exp.JSONTable: 6508 this = self._parse_format_json(self._parse_bitwise()) 6509 path = self._match(TokenType.COMMA) and self._parse_string() 6510 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6511 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6512 schema = self._parse_json_schema() 6513 6514 return exp.JSONTable( 6515 this=this, 6516 schema=schema, 6517 path=path, 6518 error_handling=error_handling, 6519 empty_handling=empty_handling, 6520 ) 6521 6522 def _parse_match_against(self) -> exp.MatchAgainst: 6523 expressions = self._parse_csv(self._parse_column) 6524 6525 self._match_text_seq(")", "AGAINST", "(") 6526 6527 this = self._parse_string() 6528 6529 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6530 modifier = "IN NATURAL LANGUAGE MODE" 6531 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6532 modifier = f"{modifier} WITH QUERY EXPANSION" 6533 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6534 modifier = "IN BOOLEAN MODE" 6535 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6536 modifier = "WITH QUERY EXPANSION" 6537 else: 6538 modifier = None 6539 6540 return self.expression( 6541 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6542 ) 6543 6544 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6545 def _parse_open_json(self) -> exp.OpenJSON: 6546 this = self._parse_bitwise() 6547 path = self._match(TokenType.COMMA) and self._parse_string() 6548 6549 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6550 this = self._parse_field(any_token=True) 6551 kind = self._parse_types() 6552 path = self._parse_string() 6553 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6554 6555 return self.expression( 6556 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6557 ) 6558 6559 expressions = None 6560 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6561 self._match_l_paren() 6562 expressions = self._parse_csv(_parse_open_json_column_def) 6563 6564 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6565 6566 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6567 args = self._parse_csv(self._parse_bitwise) 6568 6569 if self._match(TokenType.IN): 6570 return self.expression( 6571 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6572 ) 6573 6574 if haystack_first: 6575 haystack = seq_get(args, 0) 6576 needle = seq_get(args, 1) 6577 else: 6578 haystack = seq_get(args, 1) 6579 needle = seq_get(args, 0) 6580 6581 return self.expression( 6582 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6583 ) 6584 6585 def _parse_predict(self) -> exp.Predict: 6586 self._match_text_seq("MODEL") 6587 this = self._parse_table() 6588 6589 self._match(TokenType.COMMA) 6590 self._match_text_seq("TABLE") 6591 6592 return self.expression( 6593 exp.Predict, 6594 this=this, 6595 expression=self._parse_table(), 6596 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6597 ) 6598 6599 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6600 args = self._parse_csv(self._parse_table) 6601 return exp.JoinHint(this=func_name.upper(), expressions=args) 6602 6603 def _parse_substring(self) -> exp.Substring: 6604 # Postgres supports the form: substring(string [from int] [for int]) 6605 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6606 6607 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6608 6609 if self._match(TokenType.FROM): 6610 args.append(self._parse_bitwise()) 6611 if self._match(TokenType.FOR): 6612 if len(args) == 1: 6613 args.append(exp.Literal.number(1)) 6614 args.append(self._parse_bitwise()) 6615 6616 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6617 6618 def _parse_trim(self) -> exp.Trim: 6619 # https://www.w3resource.com/sql/character-functions/trim.php 6620 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6621 6622 position = None 6623 collation = None 6624 expression = None 6625 6626 if self._match_texts(self.TRIM_TYPES): 6627 position = self._prev.text.upper() 6628 6629 this = self._parse_bitwise() 6630 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6631 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6632 expression = self._parse_bitwise() 6633 6634 if invert_order: 6635 this, expression = expression, this 6636 6637 if self._match(TokenType.COLLATE): 6638 collation = self._parse_bitwise() 6639 6640 return self.expression( 6641 exp.Trim, this=this, position=position, expression=expression, collation=collation 6642 ) 6643 6644 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6645 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6646 6647 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6648 return self._parse_window(self._parse_id_var(), alias=True) 6649 6650 def _parse_respect_or_ignore_nulls( 6651 self, this: t.Optional[exp.Expression] 6652 ) -> t.Optional[exp.Expression]: 6653 if self._match_text_seq("IGNORE", "NULLS"): 6654 return self.expression(exp.IgnoreNulls, this=this) 6655 if self._match_text_seq("RESPECT", "NULLS"): 6656 return self.expression(exp.RespectNulls, this=this) 6657 return this 6658 6659 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6660 if self._match(TokenType.HAVING): 6661 self._match_texts(("MAX", "MIN")) 6662 max = self._prev.text.upper() != "MIN" 6663 return self.expression( 6664 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6665 ) 6666 6667 return this 6668 6669 def _parse_window( 6670 self, this: t.Optional[exp.Expression], alias: bool = False 6671 ) -> t.Optional[exp.Expression]: 6672 func = this 6673 comments = func.comments if isinstance(func, exp.Expression) else None 6674 6675 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6676 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6677 if self._match_text_seq("WITHIN", "GROUP"): 6678 order = self._parse_wrapped(self._parse_order) 6679 this = self.expression(exp.WithinGroup, this=this, expression=order) 6680 6681 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6682 self._match(TokenType.WHERE) 6683 this = self.expression( 6684 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6685 ) 6686 self._match_r_paren() 6687 6688 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6689 # Some dialects choose to implement and some do not. 6690 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6691 6692 # There is some code above in _parse_lambda that handles 6693 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6694 6695 # The below changes handle 6696 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6697 6698 # Oracle allows both formats 6699 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6700 # and Snowflake chose to do the same for familiarity 6701 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6702 if isinstance(this, exp.AggFunc): 6703 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6704 6705 if ignore_respect and ignore_respect is not this: 6706 ignore_respect.replace(ignore_respect.this) 6707 this = self.expression(ignore_respect.__class__, this=this) 6708 6709 this = self._parse_respect_or_ignore_nulls(this) 6710 6711 # bigquery select from window x AS (partition by ...) 6712 if alias: 6713 over = None 6714 self._match(TokenType.ALIAS) 6715 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6716 return this 6717 else: 6718 over = self._prev.text.upper() 6719 6720 if comments and isinstance(func, exp.Expression): 6721 func.pop_comments() 6722 6723 if not self._match(TokenType.L_PAREN): 6724 return self.expression( 6725 exp.Window, 6726 comments=comments, 6727 this=this, 6728 alias=self._parse_id_var(False), 6729 over=over, 6730 ) 6731 6732 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6733 6734 first = self._match(TokenType.FIRST) 6735 if self._match_text_seq("LAST"): 6736 first = False 6737 6738 partition, order = self._parse_partition_and_order() 6739 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6740 6741 if kind: 6742 self._match(TokenType.BETWEEN) 6743 start = self._parse_window_spec() 6744 self._match(TokenType.AND) 6745 end = self._parse_window_spec() 6746 6747 spec = self.expression( 6748 exp.WindowSpec, 6749 kind=kind, 6750 start=start["value"], 6751 start_side=start["side"], 6752 end=end["value"], 6753 end_side=end["side"], 6754 ) 6755 else: 6756 spec = None 6757 6758 self._match_r_paren() 6759 6760 window = self.expression( 6761 exp.Window, 6762 comments=comments, 6763 this=this, 6764 partition_by=partition, 6765 order=order, 6766 spec=spec, 6767 alias=window_alias, 6768 over=over, 6769 first=first, 6770 ) 6771 6772 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6773 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6774 return self._parse_window(window, alias=alias) 6775 6776 return window 6777 6778 def _parse_partition_and_order( 6779 self, 6780 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6781 return self._parse_partition_by(), self._parse_order() 6782 6783 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6784 self._match(TokenType.BETWEEN) 6785 6786 return { 6787 "value": ( 6788 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6789 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6790 or self._parse_bitwise() 6791 ), 6792 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6793 } 6794 6795 def _parse_alias( 6796 self, this: t.Optional[exp.Expression], explicit: bool = False 6797 ) -> t.Optional[exp.Expression]: 6798 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6799 # so this section tries to parse the clause version and if it fails, it treats the token 6800 # as an identifier (alias) 6801 if self._can_parse_limit_or_offset(): 6802 return this 6803 6804 any_token = self._match(TokenType.ALIAS) 6805 comments = self._prev_comments or [] 6806 6807 if explicit and not any_token: 6808 return this 6809 6810 if self._match(TokenType.L_PAREN): 6811 aliases = self.expression( 6812 exp.Aliases, 6813 comments=comments, 6814 this=this, 6815 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6816 ) 6817 self._match_r_paren(aliases) 6818 return aliases 6819 6820 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6821 self.STRING_ALIASES and self._parse_string_as_identifier() 6822 ) 6823 6824 if alias: 6825 comments.extend(alias.pop_comments()) 6826 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6827 column = this.this 6828 6829 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6830 if not this.comments and column and column.comments: 6831 this.comments = column.pop_comments() 6832 6833 return this 6834 6835 def _parse_id_var( 6836 self, 6837 any_token: bool = True, 6838 tokens: t.Optional[t.Collection[TokenType]] = None, 6839 ) -> t.Optional[exp.Expression]: 6840 expression = self._parse_identifier() 6841 if not expression and ( 6842 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6843 ): 6844 quoted = self._prev.token_type == TokenType.STRING 6845 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6846 6847 return expression 6848 6849 def _parse_string(self) -> t.Optional[exp.Expression]: 6850 if self._match_set(self.STRING_PARSERS): 6851 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6852 return self._parse_placeholder() 6853 6854 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6855 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6856 6857 def _parse_number(self) -> t.Optional[exp.Expression]: 6858 if self._match_set(self.NUMERIC_PARSERS): 6859 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6860 return self._parse_placeholder() 6861 6862 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6863 if self._match(TokenType.IDENTIFIER): 6864 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6865 return self._parse_placeholder() 6866 6867 def _parse_var( 6868 self, 6869 any_token: bool = False, 6870 tokens: t.Optional[t.Collection[TokenType]] = None, 6871 upper: bool = False, 6872 ) -> t.Optional[exp.Expression]: 6873 if ( 6874 (any_token and self._advance_any()) 6875 or self._match(TokenType.VAR) 6876 or (self._match_set(tokens) if tokens else False) 6877 ): 6878 return self.expression( 6879 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6880 ) 6881 return self._parse_placeholder() 6882 6883 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6884 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6885 self._advance() 6886 return self._prev 6887 return None 6888 6889 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6890 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6891 6892 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6893 return self._parse_primary() or self._parse_var(any_token=True) 6894 6895 def _parse_null(self) -> t.Optional[exp.Expression]: 6896 if self._match_set(self.NULL_TOKENS): 6897 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6898 return self._parse_placeholder() 6899 6900 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6901 if self._match(TokenType.TRUE): 6902 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6903 if self._match(TokenType.FALSE): 6904 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6905 return self._parse_placeholder() 6906 6907 def _parse_star(self) -> t.Optional[exp.Expression]: 6908 if self._match(TokenType.STAR): 6909 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6910 return self._parse_placeholder() 6911 6912 def _parse_parameter(self) -> exp.Parameter: 6913 this = self._parse_identifier() or self._parse_primary_or_var() 6914 return self.expression(exp.Parameter, this=this) 6915 6916 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6917 if self._match_set(self.PLACEHOLDER_PARSERS): 6918 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6919 if placeholder: 6920 return placeholder 6921 self._advance(-1) 6922 return None 6923 6924 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6925 if not self._match_texts(keywords): 6926 return None 6927 if self._match(TokenType.L_PAREN, advance=False): 6928 return self._parse_wrapped_csv(self._parse_expression) 6929 6930 expression = self._parse_expression() 6931 return [expression] if expression else None 6932 6933 def _parse_csv( 6934 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6935 ) -> t.List[exp.Expression]: 6936 parse_result = parse_method() 6937 items = [parse_result] if parse_result is not None else [] 6938 6939 while self._match(sep): 6940 self._add_comments(parse_result) 6941 parse_result = parse_method() 6942 if parse_result is not None: 6943 items.append(parse_result) 6944 6945 return items 6946 6947 def _parse_tokens( 6948 self, parse_method: t.Callable, expressions: t.Dict 6949 ) -> t.Optional[exp.Expression]: 6950 this = parse_method() 6951 6952 while self._match_set(expressions): 6953 this = self.expression( 6954 expressions[self._prev.token_type], 6955 this=this, 6956 comments=self._prev_comments, 6957 expression=parse_method(), 6958 ) 6959 6960 return this 6961 6962 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6963 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6964 6965 def _parse_wrapped_csv( 6966 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6967 ) -> t.List[exp.Expression]: 6968 return self._parse_wrapped( 6969 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6970 ) 6971 6972 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6973 wrapped = self._match(TokenType.L_PAREN) 6974 if not wrapped and not optional: 6975 self.raise_error("Expecting (") 6976 parse_result = parse_method() 6977 if wrapped: 6978 self._match_r_paren() 6979 return parse_result 6980 6981 def _parse_expressions(self) -> t.List[exp.Expression]: 6982 return self._parse_csv(self._parse_expression) 6983 6984 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6985 return self._parse_select() or self._parse_set_operations( 6986 self._parse_alias(self._parse_assignment(), explicit=True) 6987 if alias 6988 else self._parse_assignment() 6989 ) 6990 6991 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6992 return self._parse_query_modifiers( 6993 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6994 ) 6995 6996 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6997 this = None 6998 if self._match_texts(self.TRANSACTION_KIND): 6999 this = self._prev.text 7000 7001 self._match_texts(("TRANSACTION", "WORK")) 7002 7003 modes = [] 7004 while True: 7005 mode = [] 7006 while self._match(TokenType.VAR): 7007 mode.append(self._prev.text) 7008 7009 if mode: 7010 modes.append(" ".join(mode)) 7011 if not self._match(TokenType.COMMA): 7012 break 7013 7014 return self.expression(exp.Transaction, this=this, modes=modes) 7015 7016 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7017 chain = None 7018 savepoint = None 7019 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7020 7021 self._match_texts(("TRANSACTION", "WORK")) 7022 7023 if self._match_text_seq("TO"): 7024 self._match_text_seq("SAVEPOINT") 7025 savepoint = self._parse_id_var() 7026 7027 if self._match(TokenType.AND): 7028 chain = not self._match_text_seq("NO") 7029 self._match_text_seq("CHAIN") 7030 7031 if is_rollback: 7032 return self.expression(exp.Rollback, savepoint=savepoint) 7033 7034 return self.expression(exp.Commit, chain=chain) 7035 7036 def _parse_refresh(self) -> exp.Refresh: 7037 self._match(TokenType.TABLE) 7038 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7039 7040 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7041 if not self._match_text_seq("ADD"): 7042 return None 7043 7044 self._match(TokenType.COLUMN) 7045 exists_column = self._parse_exists(not_=True) 7046 expression = self._parse_field_def() 7047 7048 if expression: 7049 expression.set("exists", exists_column) 7050 7051 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7052 if self._match_texts(("FIRST", "AFTER")): 7053 position = self._prev.text 7054 column_position = self.expression( 7055 exp.ColumnPosition, this=self._parse_column(), position=position 7056 ) 7057 expression.set("position", column_position) 7058 7059 return expression 7060 7061 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7062 drop = self._match(TokenType.DROP) and self._parse_drop() 7063 if drop and not isinstance(drop, exp.Command): 7064 drop.set("kind", drop.args.get("kind", "COLUMN")) 7065 return drop 7066 7067 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7068 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7069 return self.expression( 7070 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7071 ) 7072 7073 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7074 index = self._index - 1 7075 7076 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7077 return self._parse_csv( 7078 lambda: self.expression( 7079 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7080 ) 7081 ) 7082 7083 self._retreat(index) 7084 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7085 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7086 7087 if self._match_text_seq("ADD", "COLUMNS"): 7088 schema = self._parse_schema() 7089 if schema: 7090 return [schema] 7091 return [] 7092 7093 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7094 7095 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7096 if self._match_texts(self.ALTER_ALTER_PARSERS): 7097 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7098 7099 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7100 # keyword after ALTER we default to parsing this statement 7101 self._match(TokenType.COLUMN) 7102 column = self._parse_field(any_token=True) 7103 7104 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7105 return self.expression(exp.AlterColumn, this=column, drop=True) 7106 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7107 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7108 if self._match(TokenType.COMMENT): 7109 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7110 if self._match_text_seq("DROP", "NOT", "NULL"): 7111 return self.expression( 7112 exp.AlterColumn, 7113 this=column, 7114 drop=True, 7115 allow_null=True, 7116 ) 7117 if self._match_text_seq("SET", "NOT", "NULL"): 7118 return self.expression( 7119 exp.AlterColumn, 7120 this=column, 7121 allow_null=False, 7122 ) 7123 7124 if self._match_text_seq("SET", "VISIBLE"): 7125 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7126 if self._match_text_seq("SET", "INVISIBLE"): 7127 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7128 7129 self._match_text_seq("SET", "DATA") 7130 self._match_text_seq("TYPE") 7131 return self.expression( 7132 exp.AlterColumn, 7133 this=column, 7134 dtype=self._parse_types(), 7135 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7136 using=self._match(TokenType.USING) and self._parse_assignment(), 7137 ) 7138 7139 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7140 if self._match_texts(("ALL", "EVEN", "AUTO")): 7141 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7142 7143 self._match_text_seq("KEY", "DISTKEY") 7144 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7145 7146 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7147 if compound: 7148 self._match_text_seq("SORTKEY") 7149 7150 if self._match(TokenType.L_PAREN, advance=False): 7151 return self.expression( 7152 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7153 ) 7154 7155 self._match_texts(("AUTO", "NONE")) 7156 return self.expression( 7157 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7158 ) 7159 7160 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7161 index = self._index - 1 7162 7163 partition_exists = self._parse_exists() 7164 if self._match(TokenType.PARTITION, advance=False): 7165 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7166 7167 self._retreat(index) 7168 return self._parse_csv(self._parse_drop_column) 7169 7170 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7171 if self._match(TokenType.COLUMN): 7172 exists = self._parse_exists() 7173 old_column = self._parse_column() 7174 to = self._match_text_seq("TO") 7175 new_column = self._parse_column() 7176 7177 if old_column is None or to is None or new_column is None: 7178 return None 7179 7180 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7181 7182 self._match_text_seq("TO") 7183 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7184 7185 def _parse_alter_table_set(self) -> exp.AlterSet: 7186 alter_set = self.expression(exp.AlterSet) 7187 7188 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7189 "TABLE", "PROPERTIES" 7190 ): 7191 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7192 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7193 alter_set.set("expressions", [self._parse_assignment()]) 7194 elif self._match_texts(("LOGGED", "UNLOGGED")): 7195 alter_set.set("option", exp.var(self._prev.text.upper())) 7196 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7197 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7198 elif self._match_text_seq("LOCATION"): 7199 alter_set.set("location", self._parse_field()) 7200 elif self._match_text_seq("ACCESS", "METHOD"): 7201 alter_set.set("access_method", self._parse_field()) 7202 elif self._match_text_seq("TABLESPACE"): 7203 alter_set.set("tablespace", self._parse_field()) 7204 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7205 alter_set.set("file_format", [self._parse_field()]) 7206 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7207 alter_set.set("file_format", self._parse_wrapped_options()) 7208 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7209 alter_set.set("copy_options", self._parse_wrapped_options()) 7210 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7211 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7212 else: 7213 if self._match_text_seq("SERDE"): 7214 alter_set.set("serde", self._parse_field()) 7215 7216 alter_set.set("expressions", [self._parse_properties()]) 7217 7218 return alter_set 7219 7220 def _parse_alter(self) -> exp.Alter | exp.Command: 7221 start = self._prev 7222 7223 alter_token = self._match_set(self.ALTERABLES) and self._prev 7224 if not alter_token: 7225 return self._parse_as_command(start) 7226 7227 exists = self._parse_exists() 7228 only = self._match_text_seq("ONLY") 7229 this = self._parse_table(schema=True) 7230 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7231 7232 if self._next: 7233 self._advance() 7234 7235 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7236 if parser: 7237 actions = ensure_list(parser(self)) 7238 not_valid = self._match_text_seq("NOT", "VALID") 7239 options = self._parse_csv(self._parse_property) 7240 7241 if not self._curr and actions: 7242 return self.expression( 7243 exp.Alter, 7244 this=this, 7245 kind=alter_token.text.upper(), 7246 exists=exists, 7247 actions=actions, 7248 only=only, 7249 options=options, 7250 cluster=cluster, 7251 not_valid=not_valid, 7252 ) 7253 7254 return self._parse_as_command(start) 7255 7256 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7257 start = self._prev 7258 # https://duckdb.org/docs/sql/statements/analyze 7259 if not self._curr: 7260 return self.expression(exp.Analyze) 7261 7262 options = [] 7263 while self._match_texts(self.ANALYZE_STYLES): 7264 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7265 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7266 else: 7267 options.append(self._prev.text.upper()) 7268 7269 this: t.Optional[exp.Expression] = None 7270 inner_expression: t.Optional[exp.Expression] = None 7271 7272 kind = self._curr and self._curr.text.upper() 7273 7274 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7275 this = self._parse_table_parts() 7276 elif self._match_text_seq("TABLES"): 7277 if self._match_set((TokenType.FROM, TokenType.IN)): 7278 kind = f"{kind} {self._prev.text.upper()}" 7279 this = self._parse_table(schema=True, is_db_reference=True) 7280 elif self._match_text_seq("DATABASE"): 7281 this = self._parse_table(schema=True, is_db_reference=True) 7282 elif self._match_text_seq("CLUSTER"): 7283 this = self._parse_table() 7284 # Try matching inner expr keywords before fallback to parse table. 7285 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7286 kind = None 7287 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7288 else: 7289 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7290 kind = None 7291 this = self._parse_table_parts() 7292 7293 partition = self._try_parse(self._parse_partition) 7294 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7295 return self._parse_as_command(start) 7296 7297 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7298 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7299 "WITH", "ASYNC", "MODE" 7300 ): 7301 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7302 else: 7303 mode = None 7304 7305 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7306 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7307 7308 properties = self._parse_properties() 7309 return self.expression( 7310 exp.Analyze, 7311 kind=kind, 7312 this=this, 7313 mode=mode, 7314 partition=partition, 7315 properties=properties, 7316 expression=inner_expression, 7317 options=options, 7318 ) 7319 7320 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7321 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7322 this = None 7323 kind = self._prev.text.upper() 7324 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7325 expressions = [] 7326 7327 if not self._match_text_seq("STATISTICS"): 7328 self.raise_error("Expecting token STATISTICS") 7329 7330 if self._match_text_seq("NOSCAN"): 7331 this = "NOSCAN" 7332 elif self._match(TokenType.FOR): 7333 if self._match_text_seq("ALL", "COLUMNS"): 7334 this = "FOR ALL COLUMNS" 7335 if self._match_texts("COLUMNS"): 7336 this = "FOR COLUMNS" 7337 expressions = self._parse_csv(self._parse_column_reference) 7338 elif self._match_text_seq("SAMPLE"): 7339 sample = self._parse_number() 7340 expressions = [ 7341 self.expression( 7342 exp.AnalyzeSample, 7343 sample=sample, 7344 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7345 ) 7346 ] 7347 7348 return self.expression( 7349 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7350 ) 7351 7352 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7353 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7354 kind = None 7355 this = None 7356 expression: t.Optional[exp.Expression] = None 7357 if self._match_text_seq("REF", "UPDATE"): 7358 kind = "REF" 7359 this = "UPDATE" 7360 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7361 this = "UPDATE SET DANGLING TO NULL" 7362 elif self._match_text_seq("STRUCTURE"): 7363 kind = "STRUCTURE" 7364 if self._match_text_seq("CASCADE", "FAST"): 7365 this = "CASCADE FAST" 7366 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7367 ("ONLINE", "OFFLINE") 7368 ): 7369 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7370 expression = self._parse_into() 7371 7372 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7373 7374 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7375 this = self._prev.text.upper() 7376 if self._match_text_seq("COLUMNS"): 7377 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7378 return None 7379 7380 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7381 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7382 if self._match_text_seq("STATISTICS"): 7383 return self.expression(exp.AnalyzeDelete, kind=kind) 7384 return None 7385 7386 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7387 if self._match_text_seq("CHAINED", "ROWS"): 7388 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7389 return None 7390 7391 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7392 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7393 this = self._prev.text.upper() 7394 expression: t.Optional[exp.Expression] = None 7395 expressions = [] 7396 update_options = None 7397 7398 if self._match_text_seq("HISTOGRAM", "ON"): 7399 expressions = self._parse_csv(self._parse_column_reference) 7400 with_expressions = [] 7401 while self._match(TokenType.WITH): 7402 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7403 if self._match_texts(("SYNC", "ASYNC")): 7404 if self._match_text_seq("MODE", advance=False): 7405 with_expressions.append(f"{self._prev.text.upper()} MODE") 7406 self._advance() 7407 else: 7408 buckets = self._parse_number() 7409 if self._match_text_seq("BUCKETS"): 7410 with_expressions.append(f"{buckets} BUCKETS") 7411 if with_expressions: 7412 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7413 7414 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7415 TokenType.UPDATE, advance=False 7416 ): 7417 update_options = self._prev.text.upper() 7418 self._advance() 7419 elif self._match_text_seq("USING", "DATA"): 7420 expression = self.expression(exp.UsingData, this=self._parse_string()) 7421 7422 return self.expression( 7423 exp.AnalyzeHistogram, 7424 this=this, 7425 expressions=expressions, 7426 expression=expression, 7427 update_options=update_options, 7428 ) 7429 7430 def _parse_merge(self) -> exp.Merge: 7431 self._match(TokenType.INTO) 7432 target = self._parse_table() 7433 7434 if target and self._match(TokenType.ALIAS, advance=False): 7435 target.set("alias", self._parse_table_alias()) 7436 7437 self._match(TokenType.USING) 7438 using = self._parse_table() 7439 7440 self._match(TokenType.ON) 7441 on = self._parse_assignment() 7442 7443 return self.expression( 7444 exp.Merge, 7445 this=target, 7446 using=using, 7447 on=on, 7448 whens=self._parse_when_matched(), 7449 returning=self._parse_returning(), 7450 ) 7451 7452 def _parse_when_matched(self) -> exp.Whens: 7453 whens = [] 7454 7455 while self._match(TokenType.WHEN): 7456 matched = not self._match(TokenType.NOT) 7457 self._match_text_seq("MATCHED") 7458 source = ( 7459 False 7460 if self._match_text_seq("BY", "TARGET") 7461 else self._match_text_seq("BY", "SOURCE") 7462 ) 7463 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7464 7465 self._match(TokenType.THEN) 7466 7467 if self._match(TokenType.INSERT): 7468 this = self._parse_star() 7469 if this: 7470 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7471 else: 7472 then = self.expression( 7473 exp.Insert, 7474 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7475 expression=self._match_text_seq("VALUES") and self._parse_value(), 7476 ) 7477 elif self._match(TokenType.UPDATE): 7478 expressions = self._parse_star() 7479 if expressions: 7480 then = self.expression(exp.Update, expressions=expressions) 7481 else: 7482 then = self.expression( 7483 exp.Update, 7484 expressions=self._match(TokenType.SET) 7485 and self._parse_csv(self._parse_equality), 7486 ) 7487 elif self._match(TokenType.DELETE): 7488 then = self.expression(exp.Var, this=self._prev.text) 7489 else: 7490 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7491 7492 whens.append( 7493 self.expression( 7494 exp.When, 7495 matched=matched, 7496 source=source, 7497 condition=condition, 7498 then=then, 7499 ) 7500 ) 7501 return self.expression(exp.Whens, expressions=whens) 7502 7503 def _parse_show(self) -> t.Optional[exp.Expression]: 7504 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7505 if parser: 7506 return parser(self) 7507 return self._parse_as_command(self._prev) 7508 7509 def _parse_set_item_assignment( 7510 self, kind: t.Optional[str] = None 7511 ) -> t.Optional[exp.Expression]: 7512 index = self._index 7513 7514 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7515 return self._parse_set_transaction(global_=kind == "GLOBAL") 7516 7517 left = self._parse_primary() or self._parse_column() 7518 assignment_delimiter = self._match_texts(("=", "TO")) 7519 7520 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7521 self._retreat(index) 7522 return None 7523 7524 right = self._parse_statement() or self._parse_id_var() 7525 if isinstance(right, (exp.Column, exp.Identifier)): 7526 right = exp.var(right.name) 7527 7528 this = self.expression(exp.EQ, this=left, expression=right) 7529 return self.expression(exp.SetItem, this=this, kind=kind) 7530 7531 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7532 self._match_text_seq("TRANSACTION") 7533 characteristics = self._parse_csv( 7534 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7535 ) 7536 return self.expression( 7537 exp.SetItem, 7538 expressions=characteristics, 7539 kind="TRANSACTION", 7540 **{"global": global_}, # type: ignore 7541 ) 7542 7543 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7544 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7545 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7546 7547 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7548 index = self._index 7549 set_ = self.expression( 7550 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7551 ) 7552 7553 if self._curr: 7554 self._retreat(index) 7555 return self._parse_as_command(self._prev) 7556 7557 return set_ 7558 7559 def _parse_var_from_options( 7560 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7561 ) -> t.Optional[exp.Var]: 7562 start = self._curr 7563 if not start: 7564 return None 7565 7566 option = start.text.upper() 7567 continuations = options.get(option) 7568 7569 index = self._index 7570 self._advance() 7571 for keywords in continuations or []: 7572 if isinstance(keywords, str): 7573 keywords = (keywords,) 7574 7575 if self._match_text_seq(*keywords): 7576 option = f"{option} {' '.join(keywords)}" 7577 break 7578 else: 7579 if continuations or continuations is None: 7580 if raise_unmatched: 7581 self.raise_error(f"Unknown option {option}") 7582 7583 self._retreat(index) 7584 return None 7585 7586 return exp.var(option) 7587 7588 def _parse_as_command(self, start: Token) -> exp.Command: 7589 while self._curr: 7590 self._advance() 7591 text = self._find_sql(start, self._prev) 7592 size = len(start.text) 7593 self._warn_unsupported() 7594 return exp.Command(this=text[:size], expression=text[size:]) 7595 7596 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7597 settings = [] 7598 7599 self._match_l_paren() 7600 kind = self._parse_id_var() 7601 7602 if self._match(TokenType.L_PAREN): 7603 while True: 7604 key = self._parse_id_var() 7605 value = self._parse_primary() 7606 if not key and value is None: 7607 break 7608 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7609 self._match(TokenType.R_PAREN) 7610 7611 self._match_r_paren() 7612 7613 return self.expression( 7614 exp.DictProperty, 7615 this=this, 7616 kind=kind.this if kind else None, 7617 settings=settings, 7618 ) 7619 7620 def _parse_dict_range(self, this: str) -> exp.DictRange: 7621 self._match_l_paren() 7622 has_min = self._match_text_seq("MIN") 7623 if has_min: 7624 min = self._parse_var() or self._parse_primary() 7625 self._match_text_seq("MAX") 7626 max = self._parse_var() or self._parse_primary() 7627 else: 7628 max = self._parse_var() or self._parse_primary() 7629 min = exp.Literal.number(0) 7630 self._match_r_paren() 7631 return self.expression(exp.DictRange, this=this, min=min, max=max) 7632 7633 def _parse_comprehension( 7634 self, this: t.Optional[exp.Expression] 7635 ) -> t.Optional[exp.Comprehension]: 7636 index = self._index 7637 expression = self._parse_column() 7638 if not self._match(TokenType.IN): 7639 self._retreat(index - 1) 7640 return None 7641 iterator = self._parse_column() 7642 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7643 return self.expression( 7644 exp.Comprehension, 7645 this=this, 7646 expression=expression, 7647 iterator=iterator, 7648 condition=condition, 7649 ) 7650 7651 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7652 if self._match(TokenType.HEREDOC_STRING): 7653 return self.expression(exp.Heredoc, this=self._prev.text) 7654 7655 if not self._match_text_seq("$"): 7656 return None 7657 7658 tags = ["$"] 7659 tag_text = None 7660 7661 if self._is_connected(): 7662 self._advance() 7663 tags.append(self._prev.text.upper()) 7664 else: 7665 self.raise_error("No closing $ found") 7666 7667 if tags[-1] != "$": 7668 if self._is_connected() and self._match_text_seq("$"): 7669 tag_text = tags[-1] 7670 tags.append("$") 7671 else: 7672 self.raise_error("No closing $ found") 7673 7674 heredoc_start = self._curr 7675 7676 while self._curr: 7677 if self._match_text_seq(*tags, advance=False): 7678 this = self._find_sql(heredoc_start, self._prev) 7679 self._advance(len(tags)) 7680 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7681 7682 self._advance() 7683 7684 self.raise_error(f"No closing {''.join(tags)} found") 7685 return None 7686 7687 def _find_parser( 7688 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7689 ) -> t.Optional[t.Callable]: 7690 if not self._curr: 7691 return None 7692 7693 index = self._index 7694 this = [] 7695 while True: 7696 # The current token might be multiple words 7697 curr = self._curr.text.upper() 7698 key = curr.split(" ") 7699 this.append(curr) 7700 7701 self._advance() 7702 result, trie = in_trie(trie, key) 7703 if result == TrieResult.FAILED: 7704 break 7705 7706 if result == TrieResult.EXISTS: 7707 subparser = parsers[" ".join(this)] 7708 return subparser 7709 7710 self._retreat(index) 7711 return None 7712 7713 def _match(self, token_type, advance=True, expression=None): 7714 if not self._curr: 7715 return None 7716 7717 if self._curr.token_type == token_type: 7718 if advance: 7719 self._advance() 7720 self._add_comments(expression) 7721 return True 7722 7723 return None 7724 7725 def _match_set(self, types, advance=True): 7726 if not self._curr: 7727 return None 7728 7729 if self._curr.token_type in types: 7730 if advance: 7731 self._advance() 7732 return True 7733 7734 return None 7735 7736 def _match_pair(self, token_type_a, token_type_b, advance=True): 7737 if not self._curr or not self._next: 7738 return None 7739 7740 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7741 if advance: 7742 self._advance(2) 7743 return True 7744 7745 return None 7746 7747 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7748 if not self._match(TokenType.L_PAREN, expression=expression): 7749 self.raise_error("Expecting (") 7750 7751 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7752 if not self._match(TokenType.R_PAREN, expression=expression): 7753 self.raise_error("Expecting )") 7754 7755 def _match_texts(self, texts, advance=True): 7756 if ( 7757 self._curr 7758 and self._curr.token_type != TokenType.STRING 7759 and self._curr.text.upper() in texts 7760 ): 7761 if advance: 7762 self._advance() 7763 return True 7764 return None 7765 7766 def _match_text_seq(self, *texts, advance=True): 7767 index = self._index 7768 for text in texts: 7769 if ( 7770 self._curr 7771 and self._curr.token_type != TokenType.STRING 7772 and self._curr.text.upper() == text 7773 ): 7774 self._advance() 7775 else: 7776 self._retreat(index) 7777 return None 7778 7779 if not advance: 7780 self._retreat(index) 7781 7782 return True 7783 7784 def _replace_lambda( 7785 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7786 ) -> t.Optional[exp.Expression]: 7787 if not node: 7788 return node 7789 7790 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7791 7792 for column in node.find_all(exp.Column): 7793 typ = lambda_types.get(column.parts[0].name) 7794 if typ is not None: 7795 dot_or_id = column.to_dot() if column.table else column.this 7796 7797 if typ: 7798 dot_or_id = self.expression( 7799 exp.Cast, 7800 this=dot_or_id, 7801 to=typ, 7802 ) 7803 7804 parent = column.parent 7805 7806 while isinstance(parent, exp.Dot): 7807 if not isinstance(parent.parent, exp.Dot): 7808 parent.replace(dot_or_id) 7809 break 7810 parent = parent.parent 7811 else: 7812 if column is node: 7813 node = dot_or_id 7814 else: 7815 column.replace(dot_or_id) 7816 return node 7817 7818 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7819 start = self._prev 7820 7821 # Not to be confused with TRUNCATE(number, decimals) function call 7822 if self._match(TokenType.L_PAREN): 7823 self._retreat(self._index - 2) 7824 return self._parse_function() 7825 7826 # Clickhouse supports TRUNCATE DATABASE as well 7827 is_database = self._match(TokenType.DATABASE) 7828 7829 self._match(TokenType.TABLE) 7830 7831 exists = self._parse_exists(not_=False) 7832 7833 expressions = self._parse_csv( 7834 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7835 ) 7836 7837 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7838 7839 if self._match_text_seq("RESTART", "IDENTITY"): 7840 identity = "RESTART" 7841 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7842 identity = "CONTINUE" 7843 else: 7844 identity = None 7845 7846 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7847 option = self._prev.text 7848 else: 7849 option = None 7850 7851 partition = self._parse_partition() 7852 7853 # Fallback case 7854 if self._curr: 7855 return self._parse_as_command(start) 7856 7857 return self.expression( 7858 exp.TruncateTable, 7859 expressions=expressions, 7860 is_database=is_database, 7861 exists=exists, 7862 cluster=cluster, 7863 identity=identity, 7864 option=option, 7865 partition=partition, 7866 ) 7867 7868 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7869 this = self._parse_ordered(self._parse_opclass) 7870 7871 if not self._match(TokenType.WITH): 7872 return this 7873 7874 op = self._parse_var(any_token=True) 7875 7876 return self.expression(exp.WithOperator, this=this, op=op) 7877 7878 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7879 self._match(TokenType.EQ) 7880 self._match(TokenType.L_PAREN) 7881 7882 opts: t.List[t.Optional[exp.Expression]] = [] 7883 while self._curr and not self._match(TokenType.R_PAREN): 7884 if self._match_text_seq("FORMAT_NAME", "="): 7885 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7886 # so we parse it separately to use _parse_field() 7887 prop = self.expression( 7888 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7889 ) 7890 opts.append(prop) 7891 else: 7892 opts.append(self._parse_property()) 7893 7894 self._match(TokenType.COMMA) 7895 7896 return opts 7897 7898 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7899 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7900 7901 options = [] 7902 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7903 option = self._parse_var(any_token=True) 7904 prev = self._prev.text.upper() 7905 7906 # Different dialects might separate options and values by white space, "=" and "AS" 7907 self._match(TokenType.EQ) 7908 self._match(TokenType.ALIAS) 7909 7910 param = self.expression(exp.CopyParameter, this=option) 7911 7912 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7913 TokenType.L_PAREN, advance=False 7914 ): 7915 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7916 param.set("expressions", self._parse_wrapped_options()) 7917 elif prev == "FILE_FORMAT": 7918 # T-SQL's external file format case 7919 param.set("expression", self._parse_field()) 7920 else: 7921 param.set("expression", self._parse_unquoted_field()) 7922 7923 options.append(param) 7924 self._match(sep) 7925 7926 return options 7927 7928 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7929 expr = self.expression(exp.Credentials) 7930 7931 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7932 expr.set("storage", self._parse_field()) 7933 if self._match_text_seq("CREDENTIALS"): 7934 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7935 creds = ( 7936 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7937 ) 7938 expr.set("credentials", creds) 7939 if self._match_text_seq("ENCRYPTION"): 7940 expr.set("encryption", self._parse_wrapped_options()) 7941 if self._match_text_seq("IAM_ROLE"): 7942 expr.set("iam_role", self._parse_field()) 7943 if self._match_text_seq("REGION"): 7944 expr.set("region", self._parse_field()) 7945 7946 return expr 7947 7948 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7949 return self._parse_field() 7950 7951 def _parse_copy(self) -> exp.Copy | exp.Command: 7952 start = self._prev 7953 7954 self._match(TokenType.INTO) 7955 7956 this = ( 7957 self._parse_select(nested=True, parse_subquery_alias=False) 7958 if self._match(TokenType.L_PAREN, advance=False) 7959 else self._parse_table(schema=True) 7960 ) 7961 7962 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7963 7964 files = self._parse_csv(self._parse_file_location) 7965 credentials = self._parse_credentials() 7966 7967 self._match_text_seq("WITH") 7968 7969 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7970 7971 # Fallback case 7972 if self._curr: 7973 return self._parse_as_command(start) 7974 7975 return self.expression( 7976 exp.Copy, 7977 this=this, 7978 kind=kind, 7979 credentials=credentials, 7980 files=files, 7981 params=params, 7982 ) 7983 7984 def _parse_normalize(self) -> exp.Normalize: 7985 return self.expression( 7986 exp.Normalize, 7987 this=self._parse_bitwise(), 7988 form=self._match(TokenType.COMMA) and self._parse_var(), 7989 ) 7990 7991 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7992 args = self._parse_csv(lambda: self._parse_lambda()) 7993 7994 this = seq_get(args, 0) 7995 decimals = seq_get(args, 1) 7996 7997 return expr_type( 7998 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7999 ) 8000 8001 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8002 if self._match_text_seq("COLUMNS", "(", advance=False): 8003 this = self._parse_function() 8004 if isinstance(this, exp.Columns): 8005 this.set("unpack", True) 8006 return this 8007 8008 return self.expression( 8009 exp.Star, 8010 **{ # type: ignore 8011 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8012 "replace": self._parse_star_op("REPLACE"), 8013 "rename": self._parse_star_op("RENAME"), 8014 }, 8015 ) 8016 8017 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8018 privilege_parts = [] 8019 8020 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8021 # (end of privilege list) or L_PAREN (start of column list) are met 8022 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8023 privilege_parts.append(self._curr.text.upper()) 8024 self._advance() 8025 8026 this = exp.var(" ".join(privilege_parts)) 8027 expressions = ( 8028 self._parse_wrapped_csv(self._parse_column) 8029 if self._match(TokenType.L_PAREN, advance=False) 8030 else None 8031 ) 8032 8033 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8034 8035 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8036 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8037 principal = self._parse_id_var() 8038 8039 if not principal: 8040 return None 8041 8042 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8043 8044 def _parse_grant(self) -> exp.Grant | exp.Command: 8045 start = self._prev 8046 8047 privileges = self._parse_csv(self._parse_grant_privilege) 8048 8049 self._match(TokenType.ON) 8050 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8051 8052 # Attempt to parse the securable e.g. MySQL allows names 8053 # such as "foo.*", "*.*" which are not easily parseable yet 8054 securable = self._try_parse(self._parse_table_parts) 8055 8056 if not securable or not self._match_text_seq("TO"): 8057 return self._parse_as_command(start) 8058 8059 principals = self._parse_csv(self._parse_grant_principal) 8060 8061 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8062 8063 if self._curr: 8064 return self._parse_as_command(start) 8065 8066 return self.expression( 8067 exp.Grant, 8068 privileges=privileges, 8069 kind=kind, 8070 securable=securable, 8071 principals=principals, 8072 grant_option=grant_option, 8073 ) 8074 8075 def _parse_overlay(self) -> exp.Overlay: 8076 return self.expression( 8077 exp.Overlay, 8078 **{ # type: ignore 8079 "this": self._parse_bitwise(), 8080 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8081 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8082 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8083 }, 8084 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1460 def __init__( 1461 self, 1462 error_level: t.Optional[ErrorLevel] = None, 1463 error_message_context: int = 100, 1464 max_errors: int = 3, 1465 dialect: DialectType = None, 1466 ): 1467 from sqlglot.dialects import Dialect 1468 1469 self.error_level = error_level or ErrorLevel.IMMEDIATE 1470 self.error_message_context = error_message_context 1471 self.max_errors = max_errors 1472 self.dialect = Dialect.get_or_raise(dialect) 1473 self.reset()
1485 def parse( 1486 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1487 ) -> t.List[t.Optional[exp.Expression]]: 1488 """ 1489 Parses a list of tokens and returns a list of syntax trees, one tree 1490 per parsed SQL statement. 1491 1492 Args: 1493 raw_tokens: The list of tokens. 1494 sql: The original SQL string, used to produce helpful debug messages. 1495 1496 Returns: 1497 The list of the produced syntax trees. 1498 """ 1499 return self._parse( 1500 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1501 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1503 def parse_into( 1504 self, 1505 expression_types: exp.IntoType, 1506 raw_tokens: t.List[Token], 1507 sql: t.Optional[str] = None, 1508 ) -> t.List[t.Optional[exp.Expression]]: 1509 """ 1510 Parses a list of tokens into a given Expression type. If a collection of Expression 1511 types is given instead, this method will try to parse the token list into each one 1512 of them, stopping at the first for which the parsing succeeds. 1513 1514 Args: 1515 expression_types: The expression type(s) to try and parse the token list into. 1516 raw_tokens: The list of tokens. 1517 sql: The original SQL string, used to produce helpful debug messages. 1518 1519 Returns: 1520 The target Expression. 1521 """ 1522 errors = [] 1523 for expression_type in ensure_list(expression_types): 1524 parser = self.EXPRESSION_PARSERS.get(expression_type) 1525 if not parser: 1526 raise TypeError(f"No parser registered for {expression_type}") 1527 1528 try: 1529 return self._parse(parser, raw_tokens, sql) 1530 except ParseError as e: 1531 e.errors[0]["into_expression"] = expression_type 1532 errors.append(e) 1533 1534 raise ParseError( 1535 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1536 errors=merge_errors(errors), 1537 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1577 def check_errors(self) -> None: 1578 """Logs or raises any found errors, depending on the chosen error level setting.""" 1579 if self.error_level == ErrorLevel.WARN: 1580 for error in self.errors: 1581 logger.error(str(error)) 1582 elif self.error_level == ErrorLevel.RAISE and self.errors: 1583 raise ParseError( 1584 concat_messages(self.errors, self.max_errors), 1585 errors=merge_errors(self.errors), 1586 )
Logs or raises any found errors, depending on the chosen error level setting.
1588 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1589 """ 1590 Appends an error in the list of recorded errors or raises it, depending on the chosen 1591 error level setting. 1592 """ 1593 token = token or self._curr or self._prev or Token.string("") 1594 start = token.start 1595 end = token.end + 1 1596 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1597 highlight = self.sql[start:end] 1598 end_context = self.sql[end : end + self.error_message_context] 1599 1600 error = ParseError.new( 1601 f"{message}. Line {token.line}, Col: {token.col}.\n" 1602 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1603 description=message, 1604 line=token.line, 1605 col=token.col, 1606 start_context=start_context, 1607 highlight=highlight, 1608 end_context=end_context, 1609 ) 1610 1611 if self.error_level == ErrorLevel.IMMEDIATE: 1612 raise error 1613 1614 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1616 def expression( 1617 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1618 ) -> E: 1619 """ 1620 Creates a new, validated Expression. 1621 1622 Args: 1623 exp_class: The expression class to instantiate. 1624 comments: An optional list of comments to attach to the expression. 1625 kwargs: The arguments to set for the expression along with their respective values. 1626 1627 Returns: 1628 The target expression. 1629 """ 1630 instance = exp_class(**kwargs) 1631 instance.add_comments(comments) if comments else self._add_comments(instance) 1632 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1639 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1640 """ 1641 Validates an Expression, making sure that all its mandatory arguments are set. 1642 1643 Args: 1644 expression: The expression to validate. 1645 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1646 1647 Returns: 1648 The validated expression. 1649 """ 1650 if self.error_level != ErrorLevel.IGNORE: 1651 for error_message in expression.error_messages(args): 1652 self.raise_error(error_message) 1653 1654 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.