sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ( 11 ErrorLevel, 12 ParseError, 13 TokenError, 14 concat_messages, 15 highlight_sql, 16 merge_errors, 17) 18from sqlglot.expressions import apply_index_offset 19from sqlglot.helper import ensure_list, i64, seq_get 20from sqlglot.trie import new_trie 21from sqlglot.time import format_time 22from sqlglot.tokens import Token, Tokenizer, TokenType 23from sqlglot.trie import TrieResult, in_trie 24from collections.abc import Sequence 25from builtins import type as Type 26 27if t.TYPE_CHECKING: 28 from sqlglot.expressions import ExpOrStr 29 from sqlglot._typing import E, BuilderArgs 30 from sqlglot.dialects.dialect import Dialect, DialectType 31 32 from re import Pattern 33 34 T = t.TypeVar("T") 35 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 36 37logger = logging.getLogger("sqlglot") 38 39OPTIONS_TYPE = dict[str, Sequence[t.Union[Sequence[str], str]]] 40 41# Used to detect alphabetical characters and +/- in timestamp literals 42TIME_ZONE_RE: Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 43 44 45def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap: 46 if len(args) == 1 and args[0].is_star: 47 return exp.StarMap(this=args[0]) 48 49 keys: list[ExpOrStr] = [] 50 values: list[ExpOrStr] = [] 51 for i in range(0, len(args), 2): 52 keys.append(args[i]) 53 values.append(args[i + 1]) 54 55 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 56 57 58def build_like(args: BuilderArgs) -> exp.Escape | exp.Like: 59 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 60 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 61 62 63def binary_range_parser( 64 expr_type: Type[exp.Expr], reverse_args: bool = False 65) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]: 66 def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None: 67 expression = self._parse_bitwise() 68 if reverse_args: 69 this, expression = expression, this 70 return self._parse_escape(self.expression(expr_type(this=this, expression=expression))) 71 72 return _parse_binary_range 73 74 75def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func: 76 # Default argument order is base, expression 77 this = seq_get(args, 0) 78 expression = seq_get(args, 1) 79 80 if expression: 81 if not dialect.LOG_BASE_FIRST: 82 this, expression = expression, this 83 return exp.Log(this=this, expression=expression) 84 85 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 86 87 88def build_hex(args: BuilderArgs, dialect: Dialect) -> exp.Hex | exp.LowerHex: 89 arg = seq_get(args, 0) 90 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 91 92 93def build_lower(args: BuilderArgs) -> exp.Lower | exp.Hex: 94 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 95 arg = seq_get(args, 0) 96 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 97 98 99def build_upper(args: BuilderArgs) -> exp.Upper | exp.Hex: 100 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 101 arg = seq_get(args, 0) 102 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 103 104 105def build_extract_json_with_path( 106 expr_type: Type[E], 107) -> t.Callable[[BuilderArgs, Dialect], E]: 108 def _builder(args: BuilderArgs, dialect: Dialect) -> E: 109 expression = expr_type( 110 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 111 ) 112 if len(args) > 2 and expr_type is exp.JSONExtract: 113 expression.set("expressions", args[2:]) 114 if expr_type is exp.JSONExtractScalar: 115 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 116 117 return expression 118 119 return _builder 120 121 122def build_mod(args: BuilderArgs) -> exp.Mod: 123 this = seq_get(args, 0) 124 expression = seq_get(args, 1) 125 126 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 127 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 128 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 129 130 return exp.Mod(this=this, expression=expression) 131 132 133def build_pad(args: BuilderArgs, is_left: bool = True): 134 return exp.Pad( 135 this=seq_get(args, 0), 136 expression=seq_get(args, 1), 137 fill_pattern=seq_get(args, 2), 138 is_left=is_left, 139 ) 140 141 142def build_array_constructor( 143 exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect 144) -> exp.Expr: 145 array_exp = exp_class(expressions=args) 146 147 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 148 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 149 150 return array_exp 151 152 153def build_convert_timezone( 154 args: BuilderArgs, default_source_tz: str | None = None 155) -> exp.ConvertTimezone | exp.Anonymous: 156 if len(args) == 2: 157 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 158 return exp.ConvertTimezone( 159 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 160 ) 161 162 return exp.ConvertTimezone.from_arg_list(args) 163 164 165def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim: 166 this, expression = seq_get(args, 0), seq_get(args, 1) 167 168 if expression and reverse_args: 169 this, expression = expression, this 170 171 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING") 172 173 174def build_coalesce( 175 args: BuilderArgs, is_nvl: bool | None = None, is_null: bool | None = None 176) -> exp.Coalesce: 177 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 178 179 180def build_locate_strposition(args: BuilderArgs) -> exp.StrPosition: 181 return exp.StrPosition( 182 this=seq_get(args, 1), 183 substr=seq_get(args, 0), 184 position=seq_get(args, 2), 185 ) 186 187 188def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend: 189 """ 190 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 191 192 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 193 Others (DuckDB, PostgreSQL) create a new single-element array instead. 194 195 Args: 196 args: Function arguments [array, element] 197 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 198 199 Returns: 200 ArrayAppend expression with appropriate null_propagation flag 201 """ 202 return exp.ArrayAppend( 203 this=seq_get(args, 0), 204 expression=seq_get(args, 1), 205 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 206 ) 207 208 209def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend: 210 """ 211 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 212 213 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 214 Others (DuckDB, PostgreSQL) create a new single-element array instead. 215 216 Args: 217 args: Function arguments [array, element] 218 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 219 220 Returns: 221 ArrayPrepend expression with appropriate null_propagation flag 222 """ 223 return exp.ArrayPrepend( 224 this=seq_get(args, 0), 225 expression=seq_get(args, 1), 226 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 227 ) 228 229 230def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat: 231 """ 232 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 233 234 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 235 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 236 237 Args: 238 args: Function arguments [array1, array2, ...] (variadic) 239 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 240 241 Returns: 242 ArrayConcat expression with appropriate null_propagation flag 243 """ 244 return exp.ArrayConcat( 245 this=seq_get(args, 0), 246 expressions=args[1:], 247 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 248 ) 249 250 251def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove: 252 """ 253 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 254 255 Some dialects (Snowflake) return NULL when the removal value is NULL. 256 Others (DuckDB) may return empty array due to NULL comparison semantics. 257 258 Args: 259 args: Function arguments [array, value_to_remove] 260 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 261 262 Returns: 263 ArrayRemove expression with appropriate null_propagation flag 264 """ 265 return exp.ArrayRemove( 266 this=seq_get(args, 0), 267 expression=seq_get(args, 1), 268 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 269 ) 270 271 272def _resolve_dialect(dialect: DialectType) -> Dialect: 273 from sqlglot.dialects.dialect import Dialect 274 275 return Dialect.get_or_raise(dialect) 276 277 278def _unpivot_target(expr: exp.Expr) -> exp.Expr: 279 # UNPIVOT's pre-FOR values and FOR field are new output names, not column references. 280 if isinstance(expr, exp.Column) and not expr.table: 281 return expr.this 282 if isinstance(expr, exp.Tuple): 283 expr.set("expressions", [_unpivot_target(e) for e in expr.expressions]) 284 return expr 285 286 287SENTINEL_NONE: Token = Token(TokenType.SENTINEL, "SENTINEL") 288 289 290class Parser: 291 """ 292 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 293 294 Args: 295 error_level: The desired error level. 296 Default: ErrorLevel.IMMEDIATE 297 error_message_context: The amount of context to capture from a query string when displaying 298 the error message (in number of characters). 299 Default: 100 300 max_errors: Maximum number of error messages to include in a raised ParseError. 301 This is only relevant if error_level is ErrorLevel.RAISE. 302 Default: 3 303 max_nodes: Maximum number of AST nodes to prevent memory exhaustion. 304 Set to -1 (default) to disable the check. 305 """ 306 307 __slots__ = ( 308 "error_level", 309 "error_message_context", 310 "max_errors", 311 "max_nodes", 312 "dialect", 313 "sql", 314 "errors", 315 "_tokens", 316 "_index", 317 "_curr", 318 "_next", 319 "_prev", 320 "_prev_comments", 321 "_pipe_cte_counter", 322 "_chunks", 323 "_chunk_index", 324 "_tokens_size", 325 "_node_count", 326 ) 327 328 FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = { 329 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 330 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 331 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 332 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 333 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 334 ), 335 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 336 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 337 ), 338 "ARRAY_APPEND": build_array_append, 339 "ARRAY_CAT": build_array_concat, 340 "ARRAY_CONCAT": build_array_concat, 341 "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), 342 "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args), 343 "ARRAY_PREPEND": build_array_prepend, 344 "ARRAY_REMOVE": build_array_remove, 345 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 346 "CONCAT": lambda args, dialect: exp.Concat( 347 expressions=args, 348 safe=not dialect.STRICT_STRING_CONCAT, 349 coalesce=dialect.CONCAT_COALESCE, 350 ), 351 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 352 expressions=args, 353 safe=not dialect.STRICT_STRING_CONCAT, 354 coalesce=dialect.CONCAT_WS_COALESCE, 355 ), 356 "CONVERT_TIMEZONE": build_convert_timezone, 357 "DATE_TO_DATE_STR": lambda args: exp.Cast( 358 this=seq_get(args, 0), 359 to=exp.DataType(this=exp.DType.TEXT), 360 ), 361 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 362 start=seq_get(args, 0), 363 end=seq_get(args, 1), 364 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 365 ), 366 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 367 is_string=dialect.UUID_IS_STRING_TYPE or None 368 ), 369 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 370 "GREATEST": lambda args, dialect: exp.Greatest( 371 this=seq_get(args, 0), 372 expressions=args[1:], 373 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 374 ), 375 "LEAST": lambda args, dialect: exp.Least( 376 this=seq_get(args, 0), 377 expressions=args[1:], 378 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 379 ), 380 "HEX": build_hex, 381 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 382 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 383 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 384 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 385 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 386 ), 387 "LIKE": build_like, 388 "LOG": build_logarithm, 389 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 390 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 391 "LOWER": build_lower, 392 "LPAD": lambda args: build_pad(args), 393 "LEFTPAD": lambda args: build_pad(args), 394 "LTRIM": lambda args: build_trim(args), 395 "MOD": build_mod, 396 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 397 "RPAD": lambda args: build_pad(args, is_left=False), 398 "RTRIM": lambda args: build_trim(args, is_left=False), 399 "SCOPE_RESOLUTION": lambda args: ( 400 exp.ScopeResolution(expression=seq_get(args, 0)) 401 if len(args) != 2 402 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)) 403 ), 404 "STRPOS": exp.StrPosition.from_arg_list, 405 "CHARINDEX": lambda args: build_locate_strposition(args), 406 "INSTR": exp.StrPosition.from_arg_list, 407 "LOCATE": lambda args: build_locate_strposition(args), 408 "TIME_TO_TIME_STR": lambda args: exp.Cast( 409 this=seq_get(args, 0), 410 to=exp.DataType(this=exp.DType.TEXT), 411 ), 412 "TO_HEX": build_hex, 413 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 414 this=exp.Cast( 415 this=seq_get(args, 0), 416 to=exp.DataType(this=exp.DType.TEXT), 417 ), 418 start=exp.Literal.number(1), 419 length=exp.Literal.number(10), 420 ), 421 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 422 "UPPER": build_upper, 423 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 424 "UUID_STRING": lambda args, dialect: exp.Uuid( 425 this=seq_get(args, 0), 426 name=seq_get(args, 1), 427 is_string=dialect.UUID_IS_STRING_TYPE or None, 428 ), 429 "VAR_MAP": build_var_map, 430 } 431 432 NO_PAREN_FUNCTIONS: t.ClassVar[dict] = { 433 TokenType.CURRENT_DATE: exp.CurrentDate, 434 TokenType.CURRENT_DATETIME: exp.CurrentDate, 435 TokenType.CURRENT_TIME: exp.CurrentTime, 436 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 437 TokenType.CURRENT_USER: exp.CurrentUser, 438 TokenType.CURRENT_ROLE: exp.CurrentRole, 439 } 440 441 STRUCT_TYPE_TOKENS: t.ClassVar = { 442 TokenType.NESTED, 443 TokenType.OBJECT, 444 TokenType.STRUCT, 445 TokenType.UNION, 446 } 447 448 NESTED_TYPE_TOKENS: t.ClassVar = { 449 TokenType.ARRAY, 450 TokenType.LIST, 451 TokenType.LOWCARDINALITY, 452 TokenType.MAP, 453 TokenType.NULLABLE, 454 TokenType.RANGE, 455 *STRUCT_TYPE_TOKENS, 456 } 457 458 ENUM_TYPE_TOKENS: t.ClassVar = { 459 TokenType.DYNAMIC, 460 TokenType.ENUM, 461 TokenType.ENUM8, 462 TokenType.ENUM16, 463 } 464 465 AGGREGATE_TYPE_TOKENS: t.ClassVar = { 466 TokenType.AGGREGATEFUNCTION, 467 TokenType.SIMPLEAGGREGATEFUNCTION, 468 } 469 470 TYPE_TOKENS: t.ClassVar = { 471 TokenType.BIT, 472 TokenType.BOOLEAN, 473 TokenType.TINYINT, 474 TokenType.UTINYINT, 475 TokenType.SMALLINT, 476 TokenType.USMALLINT, 477 TokenType.INT, 478 TokenType.UINT, 479 TokenType.BIGINT, 480 TokenType.UBIGINT, 481 TokenType.BIGNUM, 482 TokenType.INT128, 483 TokenType.UINT128, 484 TokenType.INT256, 485 TokenType.UINT256, 486 TokenType.MEDIUMINT, 487 TokenType.UMEDIUMINT, 488 TokenType.FIXEDSTRING, 489 TokenType.FLOAT, 490 TokenType.DOUBLE, 491 TokenType.UDOUBLE, 492 TokenType.CHAR, 493 TokenType.NCHAR, 494 TokenType.VARCHAR, 495 TokenType.NVARCHAR, 496 TokenType.BPCHAR, 497 TokenType.TEXT, 498 TokenType.MEDIUMTEXT, 499 TokenType.LONGTEXT, 500 TokenType.BLOB, 501 TokenType.MEDIUMBLOB, 502 TokenType.LONGBLOB, 503 TokenType.BINARY, 504 TokenType.VARBINARY, 505 TokenType.JSON, 506 TokenType.JSONB, 507 TokenType.INTERVAL, 508 TokenType.TINYBLOB, 509 TokenType.TINYTEXT, 510 TokenType.TIME, 511 TokenType.TIMETZ, 512 TokenType.TIME_NS, 513 TokenType.TIMESTAMP, 514 TokenType.TIMESTAMP_S, 515 TokenType.TIMESTAMP_MS, 516 TokenType.TIMESTAMP_NS, 517 TokenType.TIMESTAMPTZ, 518 TokenType.TIMESTAMPLTZ, 519 TokenType.TIMESTAMPNTZ, 520 TokenType.DATETIME, 521 TokenType.DATETIME2, 522 TokenType.DATETIME64, 523 TokenType.SMALLDATETIME, 524 TokenType.DATE, 525 TokenType.DATE32, 526 TokenType.INT4RANGE, 527 TokenType.INT4MULTIRANGE, 528 TokenType.INT8RANGE, 529 TokenType.INT8MULTIRANGE, 530 TokenType.NUMRANGE, 531 TokenType.NUMMULTIRANGE, 532 TokenType.TSRANGE, 533 TokenType.TSMULTIRANGE, 534 TokenType.TSTZRANGE, 535 TokenType.TSTZMULTIRANGE, 536 TokenType.DATERANGE, 537 TokenType.DATEMULTIRANGE, 538 TokenType.DECIMAL, 539 TokenType.DECIMAL32, 540 TokenType.DECIMAL64, 541 TokenType.DECIMAL128, 542 TokenType.DECIMAL256, 543 TokenType.DECFLOAT, 544 TokenType.UDECIMAL, 545 TokenType.BIGDECIMAL, 546 TokenType.UUID, 547 TokenType.GEOGRAPHY, 548 TokenType.GEOGRAPHYPOINT, 549 TokenType.GEOMETRY, 550 TokenType.POINT, 551 TokenType.RING, 552 TokenType.LINESTRING, 553 TokenType.MULTILINESTRING, 554 TokenType.POLYGON, 555 TokenType.MULTIPOLYGON, 556 TokenType.HLLSKETCH, 557 TokenType.HSTORE, 558 TokenType.PSEUDO_TYPE, 559 TokenType.SUPER, 560 TokenType.SERIAL, 561 TokenType.SMALLSERIAL, 562 TokenType.BIGSERIAL, 563 TokenType.XML, 564 TokenType.YEAR, 565 TokenType.USERDEFINED, 566 TokenType.MONEY, 567 TokenType.SMALLMONEY, 568 TokenType.ROWVERSION, 569 TokenType.IMAGE, 570 TokenType.VARIANT, 571 TokenType.VECTOR, 572 TokenType.VOID, 573 TokenType.OBJECT, 574 TokenType.OBJECT_IDENTIFIER, 575 TokenType.INET, 576 TokenType.IPADDRESS, 577 TokenType.IPPREFIX, 578 TokenType.IPV4, 579 TokenType.IPV6, 580 TokenType.UNKNOWN, 581 TokenType.NOTHING, 582 TokenType.NULL, 583 TokenType.NAME, 584 TokenType.TDIGEST, 585 TokenType.DYNAMIC, 586 *ENUM_TYPE_TOKENS, 587 *NESTED_TYPE_TOKENS, 588 *AGGREGATE_TYPE_TOKENS, 589 } 590 591 SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = { 592 TokenType.BIGINT: TokenType.UBIGINT, 593 TokenType.INT: TokenType.UINT, 594 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 595 TokenType.SMALLINT: TokenType.USMALLINT, 596 TokenType.TINYINT: TokenType.UTINYINT, 597 TokenType.DECIMAL: TokenType.UDECIMAL, 598 TokenType.DOUBLE: TokenType.UDOUBLE, 599 } 600 601 SUBQUERY_PREDICATES: t.ClassVar = { 602 TokenType.ANY: exp.Any, 603 TokenType.ALL: exp.All, 604 TokenType.EXISTS: exp.Exists, 605 TokenType.SOME: exp.Any, 606 } 607 608 SUBQUERY_TOKENS: t.ClassVar = { 609 TokenType.SELECT, 610 TokenType.WITH, 611 TokenType.FROM, 612 } 613 614 RESERVED_TOKENS: t.ClassVar = { 615 *Tokenizer.SINGLE_TOKENS.values(), 616 TokenType.SELECT, 617 } - {TokenType.IDENTIFIER} 618 619 DB_CREATABLES: t.ClassVar = { 620 TokenType.DATABASE, 621 TokenType.DICTIONARY, 622 TokenType.FILE_FORMAT, 623 TokenType.MODEL, 624 TokenType.NAMESPACE, 625 TokenType.SCHEMA, 626 TokenType.SEMANTIC_VIEW, 627 TokenType.SEQUENCE, 628 TokenType.SINK, 629 TokenType.SOURCE, 630 TokenType.STAGE, 631 TokenType.STORAGE_INTEGRATION, 632 TokenType.STREAMLIT, 633 TokenType.TABLE, 634 TokenType.TAG, 635 TokenType.VIEW, 636 TokenType.WAREHOUSE, 637 } 638 639 CREATABLES: t.ClassVar = { 640 TokenType.COLUMN, 641 TokenType.CONSTRAINT, 642 TokenType.FOREIGN_KEY, 643 TokenType.FUNCTION, 644 TokenType.INDEX, 645 TokenType.PROCEDURE, 646 TokenType.TRIGGER, 647 TokenType.TYPE, 648 *DB_CREATABLES, 649 } 650 651 TRIGGER_EVENTS: t.ClassVar = { 652 TokenType.INSERT, 653 TokenType.UPDATE, 654 TokenType.DELETE, 655 TokenType.TRUNCATE, 656 } 657 658 ALTERABLES: t.ClassVar = { 659 TokenType.INDEX, 660 TokenType.TABLE, 661 TokenType.VIEW, 662 TokenType.SESSION, 663 } 664 665 # Tokens that can represent identifiers 666 ID_VAR_TOKENS: t.ClassVar[set] = { 667 TokenType.ALL, 668 TokenType.ANALYZE, 669 TokenType.ATTACH, 670 TokenType.VAR, 671 TokenType.ANTI, 672 TokenType.APPLY, 673 TokenType.ASC, 674 TokenType.ASOF, 675 TokenType.AUTO_INCREMENT, 676 TokenType.BEGIN, 677 TokenType.BPCHAR, 678 TokenType.CACHE, 679 TokenType.CASE, 680 TokenType.COLLATE, 681 TokenType.COMMAND, 682 TokenType.COMMENT, 683 TokenType.COMMIT, 684 TokenType.CONSTRAINT, 685 TokenType.COPY, 686 TokenType.CUBE, 687 TokenType.CURRENT_SCHEMA, 688 TokenType.DEFAULT, 689 TokenType.DELETE, 690 TokenType.DESC, 691 TokenType.DESCRIBE, 692 TokenType.DETACH, 693 TokenType.DICTIONARY, 694 TokenType.DIV, 695 TokenType.END, 696 TokenType.EXECUTE, 697 TokenType.EXPORT, 698 TokenType.ESCAPE, 699 TokenType.FALSE, 700 TokenType.FIRST, 701 TokenType.FILE, 702 TokenType.FILTER, 703 TokenType.FINAL, 704 TokenType.FORMAT, 705 TokenType.FULL, 706 TokenType.GET, 707 TokenType.IDENTIFIER, 708 TokenType.INOUT, 709 TokenType.IS, 710 TokenType.ISNULL, 711 TokenType.INTERVAL, 712 TokenType.KEEP, 713 TokenType.KILL, 714 TokenType.LEFT, 715 TokenType.LIMIT, 716 TokenType.LOAD, 717 TokenType.LOCK, 718 TokenType.MATCH, 719 TokenType.MERGE, 720 TokenType.NATURAL, 721 TokenType.NEXT, 722 TokenType.OFFSET, 723 TokenType.OPERATOR, 724 TokenType.ORDINALITY, 725 TokenType.OVER, 726 TokenType.OVERLAPS, 727 TokenType.OVERWRITE, 728 TokenType.PARTITION, 729 TokenType.PERCENT, 730 TokenType.PIVOT, 731 TokenType.PRAGMA, 732 TokenType.PUT, 733 TokenType.RANGE, 734 TokenType.RECURSIVE, 735 TokenType.REFERENCES, 736 TokenType.REFRESH, 737 TokenType.RENAME, 738 TokenType.REPLACE, 739 TokenType.RIGHT, 740 TokenType.ROLLUP, 741 TokenType.ROW, 742 TokenType.ROWS, 743 TokenType.SEMI, 744 TokenType.SET, 745 TokenType.SETTINGS, 746 TokenType.SHOW, 747 TokenType.STREAM, 748 TokenType.STREAMLIT, 749 TokenType.TEMPORARY, 750 TokenType.TOP, 751 TokenType.TRUE, 752 TokenType.TRUNCATE, 753 TokenType.UNIQUE, 754 TokenType.UNNEST, 755 TokenType.UNPIVOT, 756 TokenType.UPDATE, 757 TokenType.USE, 758 TokenType.VOLATILE, 759 TokenType.WINDOW, 760 TokenType.CURRENT_CATALOG, 761 TokenType.LOCALTIME, 762 TokenType.LOCALTIMESTAMP, 763 TokenType.SESSION_USER, 764 TokenType.STRAIGHT_JOIN, 765 *ALTERABLES, 766 *CREATABLES, 767 *SUBQUERY_PREDICATES, 768 *TYPE_TOKENS, 769 *NO_PAREN_FUNCTIONS, 770 } - {TokenType.UNION} 771 772 TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - { 773 TokenType.ANTI, 774 TokenType.ASOF, 775 TokenType.FULL, 776 TokenType.LEFT, 777 TokenType.LOCK, 778 TokenType.NATURAL, 779 TokenType.RIGHT, 780 TokenType.SEMI, 781 TokenType.WINDOW, 782 } 783 784 ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS 785 786 COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS 787 788 ARRAY_CONSTRUCTORS: t.ClassVar = { 789 "ARRAY": exp.Array, 790 "LIST": exp.List, 791 } 792 793 COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS} 794 795 UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET} 796 797 TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"} 798 799 # Tokens that indicate a simple column reference 800 IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER}) 801 802 BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE}) 803 804 # Postfix tokens that prevent the bare column fast path 805 COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 806 { 807 TokenType.L_PAREN, 808 TokenType.L_BRACKET, 809 TokenType.L_BRACE, 810 TokenType.COLON, 811 TokenType.JOIN_MARKER, 812 } 813 ) 814 815 TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 816 { 817 TokenType.L_PAREN, 818 TokenType.L_BRACKET, 819 TokenType.L_BRACE, 820 TokenType.PIVOT, 821 TokenType.UNPIVOT, 822 TokenType.TABLE_SAMPLE, 823 } 824 ) 825 826 FUNC_TOKENS: t.ClassVar = { 827 TokenType.COLLATE, 828 TokenType.COMMAND, 829 TokenType.CURRENT_DATE, 830 TokenType.CURRENT_DATETIME, 831 TokenType.CURRENT_SCHEMA, 832 TokenType.CURRENT_TIMESTAMP, 833 TokenType.CURRENT_TIME, 834 TokenType.CURRENT_USER, 835 TokenType.CURRENT_CATALOG, 836 TokenType.FILTER, 837 TokenType.FIRST, 838 TokenType.FORMAT, 839 TokenType.GET, 840 TokenType.GLOB, 841 TokenType.IDENTIFIER, 842 TokenType.INDEX, 843 TokenType.ISNULL, 844 TokenType.ILIKE, 845 TokenType.INSERT, 846 TokenType.LIKE, 847 TokenType.LOCALTIME, 848 TokenType.LOCALTIMESTAMP, 849 TokenType.MERGE, 850 TokenType.NEXT, 851 TokenType.OFFSET, 852 TokenType.PRIMARY_KEY, 853 TokenType.RANGE, 854 TokenType.REPLACE, 855 TokenType.RLIKE, 856 TokenType.ROW, 857 TokenType.SESSION_USER, 858 TokenType.UNNEST, 859 TokenType.VAR, 860 TokenType.LEFT, 861 TokenType.RIGHT, 862 TokenType.SEQUENCE, 863 TokenType.DATE, 864 TokenType.DATETIME, 865 TokenType.TABLE, 866 TokenType.TIMESTAMP, 867 TokenType.TIMESTAMPTZ, 868 TokenType.TRUNCATE, 869 TokenType.UTC_DATE, 870 TokenType.UTC_TIME, 871 TokenType.UTC_TIMESTAMP, 872 TokenType.WINDOW, 873 TokenType.XOR, 874 *TYPE_TOKENS, 875 *SUBQUERY_PREDICATES, 876 } 877 878 CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 879 TokenType.AND: exp.And, 880 } 881 882 ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 883 TokenType.COLON_EQ: exp.PropertyEQ, 884 } 885 886 DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 887 TokenType.OR: exp.Or, 888 } 889 890 EQUALITY: t.ClassVar = { 891 TokenType.EQ: exp.EQ, 892 TokenType.NEQ: exp.NEQ, 893 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 894 } 895 896 COMPARISON: t.ClassVar = { 897 TokenType.GT: exp.GT, 898 TokenType.GTE: exp.GTE, 899 TokenType.LT: exp.LT, 900 TokenType.LTE: exp.LTE, 901 } 902 903 BITWISE: t.ClassVar = { 904 TokenType.AMP: exp.BitwiseAnd, 905 TokenType.CARET: exp.BitwiseXor, 906 TokenType.PIPE: exp.BitwiseOr, 907 } 908 909 TERM: t.ClassVar = { 910 TokenType.DASH: exp.Sub, 911 TokenType.PLUS: exp.Add, 912 TokenType.MOD: exp.Mod, 913 TokenType.COLLATE: exp.Collate, 914 } 915 916 FACTOR: t.ClassVar = { 917 TokenType.DIV: exp.IntDiv, 918 TokenType.LR_ARROW: exp.Distance, 919 TokenType.SLASH: exp.Div, 920 TokenType.STAR: exp.Mul, 921 } 922 923 EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {} 924 925 TIMES: t.ClassVar = { 926 TokenType.TIME, 927 TokenType.TIMETZ, 928 } 929 930 TIMESTAMPS: t.ClassVar = { 931 TokenType.TIMESTAMP, 932 TokenType.TIMESTAMPNTZ, 933 TokenType.TIMESTAMPTZ, 934 TokenType.TIMESTAMPLTZ, 935 *TIMES, 936 } 937 938 SET_OPERATIONS: t.ClassVar = { 939 TokenType.UNION, 940 TokenType.INTERSECT, 941 TokenType.EXCEPT, 942 } 943 944 JOIN_METHODS: t.ClassVar = { 945 TokenType.ASOF, 946 TokenType.NATURAL, 947 TokenType.POSITIONAL, 948 } 949 950 JOIN_SIDES: t.ClassVar = { 951 TokenType.LEFT, 952 TokenType.RIGHT, 953 TokenType.FULL, 954 } 955 956 JOIN_KINDS: t.ClassVar = { 957 TokenType.ANTI, 958 TokenType.CROSS, 959 TokenType.INNER, 960 TokenType.OUTER, 961 TokenType.SEMI, 962 TokenType.STRAIGHT_JOIN, 963 } 964 965 JOIN_HINTS: t.ClassVar[set[str]] = set() 966 967 # Tokens that unambiguously end a table reference on the fast path 968 TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset( 969 { 970 TokenType.COMMA, 971 TokenType.GROUP_BY, 972 TokenType.HAVING, 973 TokenType.JOIN, 974 TokenType.LIMIT, 975 TokenType.ON, 976 TokenType.ORDER_BY, 977 TokenType.R_PAREN, 978 TokenType.SEMICOLON, 979 TokenType.SENTINEL, 980 TokenType.WHERE, 981 *SET_OPERATIONS, 982 *JOIN_KINDS, 983 *JOIN_METHODS, 984 *JOIN_SIDES, 985 } 986 ) 987 988 LAMBDAS: t.ClassVar = { 989 TokenType.ARROW: lambda self, expressions: self.expression( 990 exp.Lambda( 991 this=self._replace_lambda( 992 self._parse_disjunction(), 993 expressions, 994 ), 995 expressions=expressions, 996 ) 997 ), 998 TokenType.FARROW: lambda self, expressions: self.expression( 999 exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction()) 1000 ), 1001 } 1002 1003 # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake 1004 TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False 1005 1006 LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN}) 1007 1008 COLUMN_OPERATORS: t.ClassVar = { 1009 TokenType.DOT: None, 1010 TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)), 1011 TokenType.DCOLON: lambda self, this, to: self.build_cast( 1012 strict=self.STRICT_CAST, this=this, to=to 1013 ), 1014 TokenType.ARROW: lambda self, this, path: self.expression( 1015 exp.JSONExtract( 1016 this=this, 1017 expression=self.dialect.to_json_path(path), 1018 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1019 ) 1020 ), 1021 TokenType.DARROW: lambda self, this, path: self.expression( 1022 exp.JSONExtractScalar( 1023 this=this, 1024 expression=self.dialect.to_json_path(path), 1025 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1026 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 1027 ) 1028 ), 1029 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 1030 exp.JSONBExtract(this=this, expression=path) 1031 ), 1032 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 1033 exp.JSONBExtractScalar(this=this, expression=path) 1034 ), 1035 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 1036 exp.JSONBContains(this=this, expression=key) 1037 ), 1038 } 1039 1040 CAST_COLUMN_OPERATORS: t.ClassVar = { 1041 TokenType.DOTCOLON, 1042 TokenType.DCOLON, 1043 } 1044 1045 EXPRESSION_PARSERS: t.ClassVar = { 1046 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1047 exp.Column: lambda self: self._parse_column(), 1048 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 1049 exp.Condition: lambda self: self._parse_disjunction(), 1050 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 1051 exp.Expr: lambda self: self._parse_expression(), 1052 exp.From: lambda self: self._parse_from(joins=True), 1053 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 1054 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 1055 exp.Group: lambda self: self._parse_group(), 1056 exp.Having: lambda self: self._parse_having(), 1057 exp.Hint: lambda self: self._parse_hint_body(), 1058 exp.Identifier: lambda self: self._parse_id_var(), 1059 exp.Join: lambda self: self._parse_join(), 1060 exp.Lambda: lambda self: self._parse_lambda(), 1061 exp.Lateral: lambda self: self._parse_lateral(), 1062 exp.Limit: lambda self: self._parse_limit(), 1063 exp.Offset: lambda self: self._parse_offset(), 1064 exp.Order: lambda self: self._parse_order(), 1065 exp.Ordered: lambda self: self._parse_ordered(), 1066 exp.Properties: lambda self: self._parse_properties(), 1067 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 1068 exp.Qualify: lambda self: self._parse_qualify(), 1069 exp.Returning: lambda self: self._parse_returning(), 1070 exp.Select: lambda self: self._parse_select(), 1071 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 1072 exp.Table: lambda self: self._parse_table_parts(), 1073 exp.TableAlias: lambda self: self._parse_table_alias(), 1074 exp.Tuple: lambda self: self._parse_value(values=False), 1075 exp.Whens: lambda self: self._parse_when_matched(), 1076 exp.Where: lambda self: self._parse_where(), 1077 exp.Window: lambda self: self._parse_named_window(), 1078 exp.With: lambda self: self._parse_with(), 1079 } 1080 1081 STATEMENT_PARSERS: t.ClassVar = { 1082 TokenType.ALTER: lambda self: self._parse_alter(), 1083 TokenType.ANALYZE: lambda self: self._parse_analyze(), 1084 TokenType.BEGIN: lambda self: self._parse_transaction(), 1085 TokenType.CACHE: lambda self: self._parse_cache(), 1086 TokenType.COMMENT: lambda self: self._parse_comment(), 1087 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 1088 TokenType.COPY: lambda self: self._parse_copy(), 1089 TokenType.CREATE: lambda self: self._parse_create(), 1090 TokenType.DELETE: lambda self: self._parse_delete(), 1091 TokenType.DESC: lambda self: self._parse_describe(), 1092 TokenType.DESCRIBE: lambda self: self._parse_describe(), 1093 TokenType.DROP: lambda self: self._parse_drop(), 1094 TokenType.GRANT: lambda self: self._parse_grant(), 1095 TokenType.REVOKE: lambda self: self._parse_revoke(), 1096 TokenType.INSERT: lambda self: self._parse_insert(), 1097 TokenType.KILL: lambda self: self._parse_kill(), 1098 TokenType.LOAD: lambda self: self._parse_load(), 1099 TokenType.MERGE: lambda self: self._parse_merge(), 1100 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 1101 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())), 1102 TokenType.REFRESH: lambda self: self._parse_refresh(), 1103 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 1104 TokenType.SET: lambda self: self._parse_set(), 1105 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 1106 TokenType.UNCACHE: lambda self: self._parse_uncache(), 1107 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 1108 TokenType.UPDATE: lambda self: self._parse_update(), 1109 TokenType.USE: lambda self: self._parse_use(), 1110 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1111 } 1112 1113 UNARY_PARSERS: t.ClassVar = { 1114 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1115 TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())), 1116 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), 1117 TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())), 1118 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())), 1119 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())), 1120 } 1121 1122 STRING_PARSERS: t.ClassVar = { 1123 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 1124 exp.RawString(this=token.text), token 1125 ), 1126 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 1127 exp.National(this=token.text), token 1128 ), 1129 TokenType.RAW_STRING: lambda self, token: self.expression( 1130 exp.RawString(this=token.text), token 1131 ), 1132 TokenType.STRING: lambda self, token: self.expression( 1133 exp.Literal(this=token.text, is_string=True), token 1134 ), 1135 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1136 exp.UnicodeString( 1137 this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string() 1138 ), 1139 token, 1140 ), 1141 } 1142 1143 NUMERIC_PARSERS: t.ClassVar = { 1144 TokenType.BIT_STRING: lambda self, token: self.expression( 1145 exp.BitString(this=token.text), token 1146 ), 1147 TokenType.BYTE_STRING: lambda self, token: self.expression( 1148 exp.ByteString( 1149 this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None 1150 ), 1151 token, 1152 ), 1153 TokenType.HEX_STRING: lambda self, token: self.expression( 1154 exp.HexString( 1155 this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None 1156 ), 1157 token, 1158 ), 1159 TokenType.NUMBER: lambda self, token: self.expression( 1160 exp.Literal(this=token.text, is_string=False), token 1161 ), 1162 } 1163 1164 PRIMARY_PARSERS: t.ClassVar = { 1165 **STRING_PARSERS, 1166 **NUMERIC_PARSERS, 1167 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1168 TokenType.NULL: lambda self, _: self.expression(exp.Null()), 1169 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)), 1170 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)), 1171 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1172 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1173 } 1174 1175 PLACEHOLDER_PARSERS: t.ClassVar = { 1176 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()), 1177 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1178 TokenType.COLON: lambda self: ( 1179 self.expression(exp.Placeholder(this=self._prev.text)) 1180 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1181 else None 1182 ), 1183 } 1184 1185 RANGE_PARSERS: t.ClassVar = { 1186 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1187 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1188 TokenType.GLOB: binary_range_parser(exp.Glob), 1189 TokenType.ILIKE: binary_range_parser(exp.ILike), 1190 TokenType.IN: lambda self, this: self._parse_in(this), 1191 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1192 TokenType.IS: lambda self, this: self._parse_is(this), 1193 TokenType.LIKE: binary_range_parser(exp.Like), 1194 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1195 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1196 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1197 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1198 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1199 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1200 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1201 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1202 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1203 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1204 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1205 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1206 } 1207 1208 PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = { 1209 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1210 "AS": lambda self, query: self._build_pipe_cte( 1211 query, [exp.Star()], self._parse_table_alias() 1212 ), 1213 "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False), 1214 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1215 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1216 "ORDER BY": lambda self, query: query.order_by( 1217 self._parse_order(), append=False, copy=False 1218 ), 1219 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1220 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1221 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1222 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1223 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1224 } 1225 1226 PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1227 "ALLOWED_VALUES": lambda self: self.expression( 1228 exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary)) 1229 ), 1230 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1231 "AUTO": lambda self: self._parse_auto_property(), 1232 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1233 "BACKUP": lambda self: self.expression( 1234 exp.BackupProperty(this=self._parse_var(any_token=True)) 1235 ), 1236 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1237 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1238 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1239 "CHECKSUM": lambda self: self._parse_checksum(), 1240 "CLUSTER BY": lambda self: self._parse_cluster(), 1241 "CLUSTERED": lambda self: self._parse_clustered_by(), 1242 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1243 exp.CollateProperty, **kwargs 1244 ), 1245 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1246 "CONTAINS": lambda self: self._parse_contains_property(), 1247 "COPY": lambda self: self._parse_copy_property(), 1248 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1249 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1250 "DEFINER": lambda self: self._parse_definer(), 1251 "DETERMINISTIC": lambda self: self.expression( 1252 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1253 ), 1254 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1255 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1256 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), 1257 "DISTKEY": lambda self: self._parse_distkey(), 1258 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1259 "EMPTY": lambda self: self.expression(exp.EmptyProperty()), 1260 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1261 "ENVIRONMENT": lambda self: self.expression( 1262 exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment)) 1263 ), 1264 "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty), 1265 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1266 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), 1267 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1268 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1269 "FREESPACE": lambda self: self._parse_freespace(), 1270 "GLOBAL": lambda self: self.expression(exp.GlobalProperty()), 1271 "HEAP": lambda self: self.expression(exp.HeapProperty()), 1272 "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), 1273 "IMMUTABLE": lambda self: self.expression( 1274 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1275 ), 1276 "INHERITS": lambda self: self.expression( 1277 exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table)) 1278 ), 1279 "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())), 1280 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1281 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1282 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1283 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1284 "LIKE": lambda self: self._parse_create_like(), 1285 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1286 "LOCK": lambda self: self._parse_locking(), 1287 "LOCKING": lambda self: self._parse_locking(), 1288 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1289 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), 1290 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1291 "MODIFIES": lambda self: self._parse_modifies_property(), 1292 "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)), 1293 "NO": lambda self: self._parse_no_property(), 1294 "ON": lambda self: self._parse_on_property(), 1295 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1296 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())), 1297 "PARTITION": lambda self: self._parse_partitioned_of(), 1298 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1299 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1300 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1301 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1302 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1303 "READS": lambda self: self._parse_reads_property(), 1304 "REMOTE": lambda self: self._parse_remote_with_connection(), 1305 "RETURNS": lambda self: self._parse_returns(), 1306 "STRICT": lambda self: self.expression(exp.StrictProperty()), 1307 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()), 1308 "ROW": lambda self: self._parse_row(), 1309 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1310 "SAMPLE": lambda self: self.expression( 1311 exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise()) 1312 ), 1313 "SECURE": lambda self: self.expression(exp.SecureProperty()), 1314 "SECURITY": lambda self: self._parse_sql_security(), 1315 "SQL SECURITY": lambda self: self._parse_sql_security(), 1316 "SET": lambda self: self.expression(exp.SetProperty(multi=False)), 1317 "SETTINGS": lambda self: self._parse_settings_property(), 1318 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1319 "SORTKEY": lambda self: self._parse_sortkey(), 1320 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1321 "STABLE": lambda self: self.expression( 1322 exp.StabilityProperty(this=exp.Literal.string("STABLE")) 1323 ), 1324 "STORED": lambda self: self._parse_stored(), 1325 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1326 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1327 "TEMP": lambda self: self.expression(exp.TemporaryProperty()), 1328 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()), 1329 "TO": lambda self: self._parse_to_table(), 1330 "TRANSIENT": lambda self: self.expression(exp.TransientProperty()), 1331 "TRANSFORM": lambda self: self.expression( 1332 exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression)) 1333 ), 1334 "TTL": lambda self: self._parse_ttl(), 1335 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1336 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()), 1337 "VOLATILE": lambda self: self._parse_volatile_property(), 1338 "WITH": lambda self: self._parse_with_property(), 1339 } 1340 1341 CONSTRAINT_PARSERS: t.ClassVar = { 1342 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1343 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1344 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)), 1345 "CHARACTER SET": lambda self: self.expression( 1346 exp.CharacterSetColumnConstraint(this=self._parse_var_or_string()) 1347 ), 1348 "CHECK": lambda self: self._parse_check_constraint(), 1349 "COLLATE": lambda self: self.expression( 1350 exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column()) 1351 ), 1352 "COMMENT": lambda self: self.expression( 1353 exp.CommentColumnConstraint(this=self._parse_string()) 1354 ), 1355 "COMPRESS": lambda self: self._parse_compress(), 1356 "CLUSTERED": lambda self: self.expression( 1357 exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1358 ), 1359 "NONCLUSTERED": lambda self: self.expression( 1360 exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1361 ), 1362 "DEFAULT": lambda self: self.expression( 1363 exp.DefaultColumnConstraint(this=self._parse_bitwise()) 1364 ), 1365 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())), 1366 "EPHEMERAL": lambda self: self.expression( 1367 exp.EphemeralColumnConstraint(this=self._parse_bitwise()) 1368 ), 1369 "EXCLUDE": lambda self: self.expression( 1370 exp.ExcludeColumnConstraint(this=self._parse_index_params()) 1371 ), 1372 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1373 "FORMAT": lambda self: self.expression( 1374 exp.DateFormatColumnConstraint(this=self._parse_var_or_string()) 1375 ), 1376 "GENERATED": lambda self: self._parse_generated_as_identity(), 1377 "IDENTITY": lambda self: self._parse_auto_increment(), 1378 "INLINE": lambda self: self._parse_inline(), 1379 "LIKE": lambda self: self._parse_create_like(), 1380 "NOT": lambda self: self._parse_not_constraint(), 1381 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)), 1382 "ON": lambda self: ( 1383 ( 1384 self._match(TokenType.UPDATE) 1385 and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function())) 1386 ) 1387 or self.expression(exp.OnProperty(this=self._parse_id_var())) 1388 ), 1389 "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())), 1390 "PERIOD": lambda self: self._parse_period_for_system_time(), 1391 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1392 "REFERENCES": lambda self: self._parse_references(match=False), 1393 "TITLE": lambda self: self.expression( 1394 exp.TitleColumnConstraint(this=self._parse_var_or_string()) 1395 ), 1396 "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])), 1397 "UNIQUE": lambda self: self._parse_unique(), 1398 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()), 1399 "WITH": lambda self: self.expression( 1400 exp.Properties(expressions=self._parse_wrapped_properties()) 1401 ), 1402 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1403 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1404 } 1405 1406 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None: 1407 if not self._match(TokenType.L_PAREN, advance=False): 1408 # Partitioning by bucket or truncate follows the syntax: 1409 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1410 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1411 self._retreat(self._index - 1) 1412 return None 1413 1414 klass = ( 1415 exp.PartitionedByBucket 1416 if self._prev.text.upper() == "BUCKET" 1417 else exp.PartitionByTruncate 1418 ) 1419 1420 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1421 this, expression = seq_get(args, 0), seq_get(args, 1) 1422 1423 if isinstance(this, exp.Literal): 1424 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1425 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1426 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1427 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1428 # 1429 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1430 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1431 this, expression = expression, this 1432 1433 return self.expression(klass(this=this, expression=expression)) 1434 1435 ALTER_PARSERS: t.ClassVar = { 1436 "ADD": lambda self: self._parse_alter_table_add(), 1437 "AS": lambda self: self._parse_select(), 1438 "ALTER": lambda self: self._parse_alter_table_alter(), 1439 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1440 "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())), 1441 "DROP": lambda self: self._parse_alter_table_drop(), 1442 "RENAME": lambda self: self._parse_alter_table_rename(), 1443 "SET": lambda self: self._parse_alter_table_set(), 1444 "SWAP": lambda self: self.expression( 1445 exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True)) 1446 ), 1447 } 1448 1449 ALTER_ALTER_PARSERS: t.ClassVar = { 1450 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1451 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1452 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1453 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1454 } 1455 1456 SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = { 1457 "CHECK", 1458 "EXCLUDE", 1459 "FOREIGN KEY", 1460 "LIKE", 1461 "PERIOD", 1462 "PRIMARY KEY", 1463 "UNIQUE", 1464 "BUCKET", 1465 "TRUNCATE", 1466 } 1467 1468 NO_PAREN_FUNCTION_PARSERS: t.ClassVar = { 1469 "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())), 1470 "CASE": lambda self: self._parse_case(), 1471 "CONNECT_BY_ROOT": lambda self: self.expression( 1472 exp.ConnectByRoot(this=self._parse_column()) 1473 ), 1474 "IF": lambda self: self._parse_if(), 1475 } 1476 1477 INVALID_FUNC_NAME_TOKENS: t.ClassVar = { 1478 TokenType.IDENTIFIER, 1479 TokenType.STRING, 1480 } 1481 1482 FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"} 1483 1484 KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1485 1486 FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1487 **{ 1488 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1489 }, 1490 **{ 1491 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1492 }, 1493 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1494 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1495 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1496 "CHAR": lambda self: self._parse_char(), 1497 "CHR": lambda self: self._parse_char(), 1498 "DECODE": lambda self: self._parse_decode(), 1499 "EXTRACT": lambda self: self._parse_extract(), 1500 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1501 "GAP_FILL": lambda self: self._parse_gap_fill(), 1502 "INITCAP": lambda self: self._parse_initcap(), 1503 "JSON_OBJECT": lambda self: self._parse_json_object(), 1504 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1505 "JSON_TABLE": lambda self: self._parse_json_table(), 1506 "MATCH": lambda self: self._parse_match_against(), 1507 "NORMALIZE": lambda self: self._parse_normalize(), 1508 "OPENJSON": lambda self: self._parse_open_json(), 1509 "OVERLAY": lambda self: self._parse_overlay(), 1510 "POSITION": lambda self: self._parse_position(), 1511 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1512 "STRING_AGG": lambda self: self._parse_string_agg(), 1513 "SUBSTRING": lambda self: self._parse_substring(), 1514 "TRIM": lambda self: self._parse_trim(), 1515 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1516 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1517 "XMLELEMENT": lambda self: self._parse_xml_element(), 1518 "XMLTABLE": lambda self: self._parse_xml_table(), 1519 } 1520 1521 QUERY_MODIFIER_PARSERS: t.ClassVar = { 1522 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1523 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1524 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1525 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1526 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1527 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1528 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1529 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1530 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1531 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1532 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1533 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1534 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1535 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1536 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1537 TokenType.CLUSTER_BY: lambda self: ( 1538 "cluster", 1539 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1540 ), 1541 TokenType.DISTRIBUTE_BY: lambda self: ( 1542 "distribute", 1543 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1544 ), 1545 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1546 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1547 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1548 } 1549 QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS) 1550 1551 SET_PARSERS: t.ClassVar = { 1552 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1553 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1554 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1555 "TRANSACTION": lambda self: self._parse_set_transaction(), 1556 } 1557 1558 SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {} 1559 1560 TYPE_LITERAL_PARSERS: t.ClassVar = { 1561 exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)), 1562 } 1563 1564 TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {} 1565 1566 DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1567 1568 PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1569 1570 TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1571 TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = { 1572 "ISOLATION": ( 1573 ("LEVEL", "REPEATABLE", "READ"), 1574 ("LEVEL", "READ", "COMMITTED"), 1575 ("LEVEL", "READ", "UNCOMITTED"), 1576 ("LEVEL", "SERIALIZABLE"), 1577 ), 1578 "READ": ("WRITE", "ONLY"), 1579 } 1580 1581 CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = { 1582 **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()), 1583 "DO": ("NOTHING", "UPDATE"), 1584 } 1585 1586 TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = { 1587 "INSTEAD": (("OF",),), 1588 "BEFORE": tuple(), 1589 "AFTER": tuple(), 1590 } 1591 1592 TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = { 1593 "NOT": (("DEFERRABLE",),), 1594 "DEFERRABLE": tuple(), 1595 } 1596 1597 CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = { 1598 "SCALE": ("EXTEND", "NOEXTEND"), 1599 "SHARD": ("EXTEND", "NOEXTEND"), 1600 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1601 **dict.fromkeys( 1602 ( 1603 "SESSION", 1604 "GLOBAL", 1605 "KEEP", 1606 "NOKEEP", 1607 "ORDER", 1608 "NOORDER", 1609 "NOCACHE", 1610 "CYCLE", 1611 "NOCYCLE", 1612 "NOMINVALUE", 1613 "NOMAXVALUE", 1614 "NOSCALE", 1615 "NOSHARD", 1616 ), 1617 tuple(), 1618 ), 1619 } 1620 1621 ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")} 1622 1623 USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1624 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1625 ) 1626 1627 CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1628 1629 SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1630 "TYPE": ("EVOLUTION",), 1631 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1632 } 1633 1634 PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {} 1635 1636 EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1637 ("CALLER", "SELF", "OWNER"), tuple() 1638 ) 1639 1640 KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1641 "NOT": ("ENFORCED",), 1642 "MATCH": ( 1643 "FULL", 1644 "PARTIAL", 1645 "SIMPLE", 1646 ), 1647 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1648 "USING": ( 1649 "BTREE", 1650 "HASH", 1651 ), 1652 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1653 } 1654 1655 WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1656 "NO": ("OTHERS",), 1657 "CURRENT": ("ROW",), 1658 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1659 } 1660 1661 INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1662 1663 CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"} 1664 HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"} 1665 HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1666 1667 OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"} 1668 1669 OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN} 1670 1671 TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1672 1673 VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1674 1675 WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1676 WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER} 1677 WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"} 1678 1679 JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1680 1681 FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1682 1683 ADD_CONSTRAINT_TOKENS: t.ClassVar = { 1684 TokenType.CONSTRAINT, 1685 TokenType.FOREIGN_KEY, 1686 TokenType.INDEX, 1687 TokenType.KEY, 1688 TokenType.PRIMARY_KEY, 1689 TokenType.UNIQUE, 1690 } 1691 1692 DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT} 1693 1694 UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1695 1696 SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1697 1698 COPY_INTO_VARLEN_OPTIONS: t.ClassVar = { 1699 "FILE_FORMAT", 1700 "COPY_OPTIONS", 1701 "FORMAT_OPTIONS", 1702 "CREDENTIAL", 1703 } 1704 1705 IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1706 1707 ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {} 1708 1709 ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1710 1711 PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1712 1713 # The style options for the DESCRIBE statement 1714 DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1715 1716 SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"} 1717 1718 # The style options for the ANALYZE statement 1719 ANALYZE_STYLES: t.ClassVar = { 1720 "BUFFER_USAGE_LIMIT", 1721 "FULL", 1722 "LOCAL", 1723 "NO_WRITE_TO_BINLOG", 1724 "SAMPLE", 1725 "SKIP_LOCKED", 1726 "VERBOSE", 1727 } 1728 1729 ANALYZE_EXPRESSION_PARSERS: t.ClassVar = { 1730 "ALL": lambda self: self._parse_analyze_columns(), 1731 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1732 "DELETE": lambda self: self._parse_analyze_delete(), 1733 "DROP": lambda self: self._parse_analyze_histogram(), 1734 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1735 "LIST": lambda self: self._parse_analyze_list(), 1736 "PREDICATE": lambda self: self._parse_analyze_columns(), 1737 "UPDATE": lambda self: self._parse_analyze_histogram(), 1738 "VALIDATE": lambda self: self._parse_analyze_validate(), 1739 } 1740 1741 PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"} 1742 1743 AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET) 1744 1745 OPERATION_MODIFIERS: t.ClassVar[set[str]] = set() 1746 1747 RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"} 1748 1749 SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"} 1750 1751 MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1752 1753 STRICT_CAST: t.ClassVar = True 1754 1755 PREFIXED_PIVOT_COLUMNS: t.ClassVar = False 1756 IDENTIFY_PIVOT_STRINGS: t.ClassVar = False 1757 1758 LOG_DEFAULTS_TO_LN: t.ClassVar = False 1759 1760 # Whether the table sample clause expects CSV syntax 1761 TABLESAMPLE_CSV: t.ClassVar = False 1762 1763 # The default method used for table sampling 1764 DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None 1765 1766 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1767 SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True 1768 1769 # Whether the TRIM function expects the characters to trim as its first argument 1770 TRIM_PATTERN_FIRST: t.ClassVar = False 1771 1772 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1773 STRING_ALIASES: t.ClassVar = False 1774 1775 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1776 MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True 1777 SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"} 1778 1779 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1780 NO_PAREN_IF_COMMANDS: t.ClassVar = True 1781 1782 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1783 JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False 1784 1785 # Whether the `:` operator is used to extract a value from a VARIANT column 1786 COLON_IS_VARIANT_EXTRACT: t.ClassVar = False 1787 1788 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1789 # If this is True and '(' is not found, the keyword will be treated as an identifier 1790 VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True 1791 1792 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1793 SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False 1794 1795 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1796 INTERVAL_SPANS: t.ClassVar = True 1797 1798 # Whether a PARTITION clause can follow a table reference 1799 SUPPORTS_PARTITION_SELECTION: t.ClassVar = False 1800 1801 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1802 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True 1803 1804 # Whether the 'AS' keyword is optional in the CTE definition syntax 1805 OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True 1806 1807 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1808 ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True 1809 1810 # Whether Alter statements are allowed to contain Partition specifications 1811 ALTER_TABLE_PARTITIONS: t.ClassVar = False 1812 1813 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1814 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1815 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1816 # as BigQuery, where all joins have the same precedence. 1817 JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False 1818 1819 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1820 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False 1821 1822 # Whether map literals support arbitrary expressions as keys. 1823 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1824 # When False, keys are typically restricted to identifiers. 1825 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False 1826 1827 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1828 # is true for Snowflake but not for BigQuery which can also process strings 1829 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False 1830 1831 # Dialects like Databricks support JOINS without join criteria 1832 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1833 ADD_JOIN_ON_TRUE: t.ClassVar = False 1834 1835 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1836 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1837 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False 1838 1839 SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS) 1840 SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS) 1841 1842 def __init__( 1843 self, 1844 error_level: ErrorLevel | None = None, 1845 error_message_context: int = 100, 1846 max_errors: int = 3, 1847 max_nodes: int = -1, 1848 dialect: DialectType = None, 1849 ): 1850 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1851 self.error_message_context: int = error_message_context 1852 self.max_errors: int = max_errors 1853 self.max_nodes: int = max_nodes 1854 self.dialect: t.Any = _resolve_dialect(dialect) 1855 self.sql: str = "" 1856 self.errors: list[ParseError] = [] 1857 self._tokens: list[Token] = [] 1858 self._tokens_size: i64 = 0 1859 self._index: i64 = 0 1860 self._curr: Token = SENTINEL_NONE 1861 self._next: Token = SENTINEL_NONE 1862 self._prev: Token = SENTINEL_NONE 1863 self._prev_comments: list[str] = [] 1864 self._pipe_cte_counter: int = 0 1865 self._chunks: list[list[Token]] = [] 1866 self._chunk_index: i64 = 0 1867 self._node_count: int = 0 1868 1869 def reset(self) -> None: 1870 self.sql = "" 1871 self.errors = [] 1872 self._tokens = [] 1873 self._tokens_size = 0 1874 self._index = 0 1875 self._curr = SENTINEL_NONE 1876 self._next = SENTINEL_NONE 1877 self._prev = SENTINEL_NONE 1878 self._prev_comments = [] 1879 self._pipe_cte_counter = 0 1880 self._chunks = [] 1881 self._chunk_index = 0 1882 self._node_count = 0 1883 1884 def _advance(self, times: i64 = 1) -> None: 1885 index = self._index + times 1886 self._index = index 1887 tokens = self._tokens 1888 size = self._tokens_size 1889 self._curr = tokens[index] if index < size else SENTINEL_NONE 1890 self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE 1891 1892 if index > 0: 1893 prev = tokens[index - 1] 1894 self._prev = prev 1895 self._prev_comments = prev.comments 1896 else: 1897 self._prev = SENTINEL_NONE 1898 self._prev_comments = [] 1899 1900 def _advance_chunk(self) -> None: 1901 self._index = -1 1902 self._tokens = self._chunks[self._chunk_index] 1903 self._tokens_size = i64(len(self._tokens)) 1904 self._chunk_index += 1 1905 self._advance() 1906 1907 def _retreat(self, index: i64) -> None: 1908 if index != self._index: 1909 self._advance(index - self._index) 1910 1911 def _add_comments(self, expression: exp.Expr | None) -> None: 1912 if expression and self._prev_comments: 1913 expression.add_comments(self._prev_comments) 1914 self._prev_comments = [] 1915 1916 def _match( 1917 self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None 1918 ) -> bool: 1919 if self._curr.token_type == token_type: 1920 if advance: 1921 self._advance() 1922 self._add_comments(expression) 1923 return True 1924 return False 1925 1926 def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool: 1927 if self._curr.token_type in types: 1928 if advance: 1929 self._advance() 1930 return True 1931 return False 1932 1933 def _match_pair( 1934 self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True 1935 ) -> bool: 1936 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 1937 if advance: 1938 self._advance(2) 1939 return True 1940 return False 1941 1942 def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool: 1943 if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts: 1944 if advance: 1945 self._advance() 1946 return True 1947 return False 1948 1949 def _match_text_seq(self, *texts: str, advance: bool = True) -> bool: 1950 index = self._index 1951 string_type = TokenType.STRING 1952 for text in texts: 1953 if self._curr.token_type != string_type and self._curr.text.upper() == text: 1954 self._advance() 1955 else: 1956 self._retreat(index) 1957 return False 1958 1959 if not advance: 1960 self._retreat(index) 1961 1962 return True 1963 1964 def _is_connected(self) -> bool: 1965 prev = self._prev 1966 curr = self._curr 1967 return bool(prev and curr and prev.end + 1 == curr.start) 1968 1969 def _find_sql(self, start: Token, end: Token) -> str: 1970 return self.sql[start.start : end.end + 1] 1971 1972 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1973 token = token or self._curr or self._prev or Token.string("") 1974 formatted_sql, start_context, highlight, end_context = highlight_sql( 1975 sql=self.sql, 1976 positions=[(token.start, token.end)], 1977 context_length=self.error_message_context, 1978 ) 1979 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1980 1981 error = ParseError.new( 1982 formatted_message, 1983 description=message, 1984 line=token.line, 1985 col=token.col, 1986 start_context=start_context, 1987 highlight=highlight, 1988 end_context=end_context, 1989 ) 1990 1991 if self.error_level == ErrorLevel.IMMEDIATE: 1992 raise error 1993 1994 self.errors.append(error) 1995 1996 def validate_expression(self, expression: E, args: list | None = None) -> E: 1997 if self.max_nodes > -1: 1998 self._node_count += 1 1999 if self._node_count > self.max_nodes: 2000 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 2001 if self.error_level != ErrorLevel.IGNORE: 2002 for error_message in expression.error_messages(args): 2003 self.raise_error(error_message) 2004 return expression 2005 2006 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None: 2007 index = self._index 2008 error_level = self.error_level 2009 this: T | None = None 2010 2011 self.error_level = ErrorLevel.IMMEDIATE 2012 try: 2013 this = parse_method() 2014 except ParseError: 2015 this = None 2016 finally: 2017 if not this or retreat: 2018 self._retreat(index) 2019 self.error_level = error_level 2020 2021 return this 2022 2023 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2024 """ 2025 Parses a list of tokens and returns a list of syntax trees, one tree 2026 per parsed SQL statement. 2027 2028 Args: 2029 raw_tokens: The list of tokens. 2030 sql: The original SQL string. 2031 2032 Returns: 2033 The list of the produced syntax trees. 2034 """ 2035 return self._parse( 2036 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2037 ) 2038 2039 def parse_into( 2040 self, 2041 expression_types: exp.IntoType, 2042 raw_tokens: list[Token], 2043 sql: str | None = None, 2044 ) -> list[exp.Expr | None]: 2045 """ 2046 Parses a list of tokens into a given Expr type. If a collection of Expr 2047 types is given instead, this method will try to parse the token list into each one 2048 of them, stopping at the first for which the parsing succeeds. 2049 2050 Args: 2051 expression_types: The expression type(s) to try and parse the token list into. 2052 raw_tokens: The list of tokens. 2053 sql: The original SQL string, used to produce helpful debug messages. 2054 2055 Returns: 2056 The target Expr. 2057 """ 2058 errors = [] 2059 for expression_type in ensure_list(expression_types): 2060 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2061 if not parser: 2062 raise TypeError(f"No parser registered for {expression_type}") 2063 2064 try: 2065 return self._parse(parser, raw_tokens, sql) 2066 except ParseError as e: 2067 e.errors[0]["into_expression"] = expression_type 2068 errors.append(e) 2069 2070 raise ParseError( 2071 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2072 errors=merge_errors(errors), 2073 ) from errors[-1] 2074 2075 def check_errors(self) -> None: 2076 """Logs or raises any found errors, depending on the chosen error level setting.""" 2077 if self.error_level == ErrorLevel.WARN: 2078 for error in self.errors: 2079 logger.error(str(error)) 2080 elif self.error_level == ErrorLevel.RAISE and self.errors: 2081 raise ParseError( 2082 concat_messages(self.errors, self.max_errors), 2083 errors=merge_errors(self.errors), 2084 ) 2085 2086 def expression( 2087 self, 2088 instance: E, 2089 token: Token | None = None, 2090 comments: list[str] | None = None, 2091 ) -> E: 2092 if token: 2093 instance.update_positions(token) 2094 instance.add_comments(comments) if comments else self._add_comments(instance) 2095 if not instance.is_primitive: 2096 instance = self.validate_expression(instance) 2097 return instance 2098 2099 def _parse_batch_statements( 2100 self, 2101 parse_method: t.Callable[[Parser], exp.Expr | None], 2102 sep_first_statement: bool = True, 2103 ) -> list[exp.Expr | None]: 2104 expressions = [] 2105 2106 # Chunkification binds if/while statements with the first statement of the body 2107 if sep_first_statement: 2108 self._match(TokenType.BEGIN) 2109 expressions.append(parse_method(self)) 2110 2111 chunks_length = len(self._chunks) 2112 while self._chunk_index < chunks_length: 2113 self._advance_chunk() 2114 2115 if self._match(TokenType.ELSE, advance=False): 2116 return expressions 2117 2118 if expressions and not self._next and self._match(TokenType.END): 2119 expressions.append(exp.EndStatement()) 2120 continue 2121 2122 expressions.append(parse_method(self)) 2123 2124 if self._index < self._tokens_size: 2125 self.raise_error("Invalid expression / Unexpected token") 2126 2127 self.check_errors() 2128 2129 return expressions 2130 2131 def _parse( 2132 self, 2133 parse_method: t.Callable[[Parser], exp.Expr | None], 2134 raw_tokens: list[Token], 2135 sql: str | None = None, 2136 ) -> list[exp.Expr | None]: 2137 self.reset() 2138 self.sql = sql or "" 2139 2140 total = len(raw_tokens) 2141 chunks: list[list[Token]] = [[]] 2142 2143 for i, token in enumerate(raw_tokens): 2144 if token.token_type == TokenType.SEMICOLON: 2145 if token.comments: 2146 chunks.append([token]) 2147 2148 if i < total - 1: 2149 chunks.append([]) 2150 else: 2151 chunks[-1].append(token) 2152 2153 self._chunks = chunks 2154 2155 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 2156 2157 def _warn_unsupported(self) -> None: 2158 if self._tokens_size <= 1: 2159 return 2160 2161 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 2162 # interested in emitting a warning for the one being currently processed. 2163 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 2164 2165 logger.warning( 2166 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 2167 ) 2168 2169 def _parse_command(self) -> exp.Command: 2170 self._warn_unsupported() 2171 comments = self._prev_comments 2172 return self.expression( 2173 exp.Command(this=self._prev.text.upper(), expression=self._parse_string()), 2174 comments=comments, 2175 ) 2176 2177 def _parse_comment(self, allow_exists: bool = True) -> exp.Expr: 2178 start = self._prev 2179 exists = self._parse_exists() if allow_exists else None 2180 2181 self._match(TokenType.ON) 2182 2183 materialized = self._match_text_seq("MATERIALIZED") 2184 kind = self._match_set(self.CREATABLES) and self._prev 2185 if not kind: 2186 return self._parse_as_command(start) 2187 2188 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2189 this = self._parse_user_defined_function(kind=kind.token_type) 2190 elif kind.token_type == TokenType.TABLE: 2191 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 2192 elif kind.token_type == TokenType.COLUMN: 2193 this = self._parse_column() 2194 else: 2195 this = self._parse_id_var() 2196 2197 self._match(TokenType.IS) 2198 2199 return self.expression( 2200 exp.Comment( 2201 this=this, 2202 kind=kind.text, 2203 expression=self._parse_string(), 2204 exists=exists, 2205 materialized=materialized, 2206 ) 2207 ) 2208 2209 def _parse_to_table( 2210 self, 2211 ) -> exp.ToTableProperty: 2212 table = self._parse_table_parts(schema=True) 2213 return self.expression(exp.ToTableProperty(this=table)) 2214 2215 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2216 def _parse_ttl(self) -> exp.Expr: 2217 def _parse_ttl_action() -> exp.Expr | None: 2218 this = self._parse_bitwise() 2219 2220 if self._match_text_seq("DELETE"): 2221 return self.expression(exp.MergeTreeTTLAction(this=this, delete=True)) 2222 if self._match_text_seq("RECOMPRESS"): 2223 return self.expression( 2224 exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise()) 2225 ) 2226 if self._match_text_seq("TO", "DISK"): 2227 return self.expression( 2228 exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string()) 2229 ) 2230 if self._match_text_seq("TO", "VOLUME"): 2231 return self.expression( 2232 exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string()) 2233 ) 2234 2235 return this 2236 2237 expressions = self._parse_csv(_parse_ttl_action) 2238 where = self._parse_where() 2239 group = self._parse_group() 2240 2241 aggregates = None 2242 if group and self._match(TokenType.SET): 2243 aggregates = self._parse_csv(self._parse_set_item) 2244 2245 return self.expression( 2246 exp.MergeTreeTTL( 2247 expressions=expressions, where=where, group=group, aggregates=aggregates 2248 ) 2249 ) 2250 2251 def _parse_condition(self) -> exp.Expr | None: 2252 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2253 2254 def _parse_block(self) -> exp.Block: 2255 return self.expression( 2256 exp.Block( 2257 expressions=self._parse_batch_statements( 2258 parse_method=lambda self: self._parse_statement() 2259 ) 2260 ) 2261 ) 2262 2263 def _parse_whileblock(self) -> exp.WhileBlock: 2264 return self.expression( 2265 exp.WhileBlock(this=self._parse_condition(), body=self._parse_block()) 2266 ) 2267 2268 def _parse_statement(self) -> exp.Expr | None: 2269 if not self._curr: 2270 return None 2271 2272 if self._match_set(self.STATEMENT_PARSERS): 2273 comments = self._prev_comments 2274 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2275 stmt.add_comments(comments, prepend=True) 2276 return stmt 2277 2278 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2279 return self._parse_command() 2280 2281 if self._match_text_seq("WHILE"): 2282 return self._parse_whileblock() 2283 2284 expression = self._parse_expression() 2285 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2286 2287 if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False): 2288 expression = self._parse_pipe_syntax_query(expression) 2289 2290 return self._parse_query_modifiers(expression) 2291 2292 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2293 start = self._prev 2294 temporary = self._match(TokenType.TEMPORARY) 2295 materialized = self._match_text_seq("MATERIALIZED") 2296 iceberg = self._match_text_seq("ICEBERG") 2297 2298 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2299 if not kind or (iceberg and kind and kind != "TABLE"): 2300 return self._parse_as_command(start) 2301 2302 concurrently = self._match_text_seq("CONCURRENTLY") 2303 if_exists = exists or self._parse_exists() 2304 2305 if kind == "COLUMN": 2306 this = self._parse_column() 2307 else: 2308 this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA") 2309 2310 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2311 2312 if self._match(TokenType.L_PAREN, advance=False): 2313 expressions = self._parse_wrapped_csv(self._parse_types) 2314 else: 2315 expressions = None 2316 2317 cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper() 2318 2319 return self.expression( 2320 exp.Drop( 2321 exists=if_exists, 2322 this=this, 2323 expressions=expressions, 2324 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2325 temporary=temporary, 2326 materialized=materialized, 2327 cascade=cascade_or_restrict == "CASCADE", 2328 restrict=cascade_or_restrict == "RESTRICT", 2329 constraints=self._match_text_seq("CONSTRAINTS"), 2330 purge=self._match_text_seq("PURGE"), 2331 cluster=cluster, 2332 concurrently=concurrently, 2333 sync=self._match_text_seq("SYNC"), 2334 iceberg=iceberg, 2335 ) 2336 ) 2337 2338 def _parse_exists(self, not_: bool = False) -> bool | None: 2339 return ( 2340 self._match_text_seq("IF") 2341 and (not not_ or self._match(TokenType.NOT)) 2342 and self._match(TokenType.EXISTS) 2343 ) 2344 2345 def _parse_create(self) -> exp.Create | exp.Command: 2346 # Note: this can't be None because we've matched a statement parser 2347 start = self._prev 2348 2349 replace = ( 2350 start.token_type == TokenType.REPLACE 2351 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2352 or self._match_pair(TokenType.OR, TokenType.ALTER) 2353 ) 2354 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2355 2356 unique = self._match(TokenType.UNIQUE) 2357 2358 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2359 clustered = True 2360 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2361 "COLUMNSTORE" 2362 ): 2363 clustered = False 2364 else: 2365 clustered = None 2366 2367 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2368 self._advance() 2369 2370 properties = None 2371 create_token = self._match_set(self.CREATABLES) and self._prev 2372 2373 if not create_token: 2374 # exp.Properties.Location.POST_CREATE 2375 properties = self._parse_properties() 2376 create_token = self._match_set(self.CREATABLES) and self._prev 2377 2378 if not properties or not create_token: 2379 return self._parse_as_command(start) 2380 2381 create_token_type = t.cast(Token, create_token).token_type 2382 2383 concurrently = self._match_text_seq("CONCURRENTLY") 2384 exists = self._parse_exists(not_=True) 2385 this = None 2386 expression: exp.Expr | None = None 2387 indexes = None 2388 no_schema_binding = None 2389 begin = None 2390 clone = None 2391 2392 def extend_props(temp_props: exp.Properties | None) -> None: 2393 nonlocal properties 2394 if properties and temp_props: 2395 properties.expressions.extend(temp_props.expressions) 2396 elif temp_props: 2397 properties = temp_props 2398 2399 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2400 this = self._parse_user_defined_function(kind=create_token_type) 2401 2402 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2403 extend_props(self._parse_properties()) 2404 2405 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2406 extend_props(self._parse_function_properties()) 2407 2408 if not expression: 2409 if self._match(TokenType.COMMAND): 2410 expression = self._parse_as_command(self._prev) 2411 else: 2412 begin = self._match(TokenType.BEGIN) 2413 return_ = self._match_text_seq("RETURN") 2414 2415 if self._match(TokenType.STRING, advance=False): 2416 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2417 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2418 expression = self._parse_string() 2419 extend_props(self._parse_properties()) 2420 else: 2421 expression = ( 2422 self._parse_user_defined_function_expression() 2423 if create_token_type == TokenType.FUNCTION 2424 else self._parse_block() 2425 ) 2426 2427 if return_: 2428 expression = self.expression(exp.Return(this=expression)) 2429 elif create_token_type == TokenType.INDEX: 2430 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2431 if not self._match(TokenType.ON): 2432 index = self._parse_id_var() 2433 anonymous = False 2434 else: 2435 index = None 2436 anonymous = True 2437 2438 this = self._parse_index(index=index, anonymous=anonymous) 2439 elif ( 2440 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2441 ) or create_token_type == TokenType.TRIGGER: 2442 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2443 create_token = self._prev 2444 2445 trigger_name = self._parse_id_var() 2446 if not trigger_name: 2447 return self._parse_as_command(start) 2448 2449 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2450 timing = timing_var.this if timing_var else None 2451 if not timing: 2452 return self._parse_as_command(start) 2453 2454 events = self._parse_trigger_events() 2455 if not self._match(TokenType.ON): 2456 self.raise_error("Expected ON in trigger definition") 2457 2458 table = self._parse_table_parts() 2459 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2460 deferrable, initially = self._parse_trigger_deferrable() 2461 referencing = self._parse_trigger_referencing() 2462 for_each = self._parse_trigger_for_each() 2463 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2464 self._parse_disjunction, optional=True 2465 ) 2466 execute = self._parse_trigger_execute() 2467 2468 if execute is None: 2469 return self._parse_as_command(start) 2470 2471 trigger_props = self.expression( 2472 exp.TriggerProperties( 2473 table=table, 2474 timing=timing, 2475 events=events, 2476 execute=execute, 2477 constraint=is_constraint, 2478 referenced_table=referenced_table, 2479 deferrable=deferrable, 2480 initially=initially, 2481 referencing=referencing, 2482 for_each=for_each, 2483 when=when, 2484 ) 2485 ) 2486 2487 this = trigger_name 2488 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2489 elif create_token_type == TokenType.TYPE: 2490 this = self._parse_table_parts(schema=True) 2491 if not this or not self._match(TokenType.ALIAS): 2492 return self._parse_as_command(start) 2493 2494 if self._match(TokenType.ENUM): 2495 expression = exp.DataType( 2496 this=exp.DType.ENUM, 2497 expressions=self._parse_wrapped_csv(self._parse_string), 2498 ) 2499 elif self._match(TokenType.L_PAREN, advance=False): 2500 expression = self._parse_schema() 2501 else: 2502 return self._parse_as_command(start) 2503 elif create_token_type in self.DB_CREATABLES: 2504 table_parts = self._parse_table_parts( 2505 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2506 ) 2507 2508 # exp.Properties.Location.POST_NAME 2509 self._match(TokenType.COMMA) 2510 extend_props(self._parse_properties(before=True)) 2511 2512 this = self._parse_schema(this=table_parts) 2513 2514 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2515 extend_props(self._parse_properties()) 2516 2517 has_alias = self._match(TokenType.ALIAS) 2518 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2519 # exp.Properties.Location.POST_ALIAS 2520 extend_props(self._parse_properties()) 2521 2522 if create_token_type == TokenType.SEQUENCE: 2523 expression = self._parse_types() 2524 props = self._parse_properties() 2525 if props: 2526 sequence_props = exp.SequenceProperties() 2527 options = [] 2528 for prop in props: 2529 if isinstance(prop, exp.SequenceProperties): 2530 for arg, value in prop.args.items(): 2531 if arg == "options": 2532 options.extend(value) 2533 else: 2534 sequence_props.set(arg, value) 2535 prop.pop() 2536 2537 if options: 2538 sequence_props.set("options", options) 2539 2540 props.append("expressions", sequence_props) 2541 extend_props(props) 2542 else: 2543 expression = self._parse_ddl_select() 2544 2545 # Some dialects also support using a table as an alias instead of a SELECT. 2546 # Here we fallback to this as an alternative. 2547 if not expression and has_alias: 2548 expression = self._try_parse(self._parse_table_parts) 2549 2550 if create_token_type == TokenType.TABLE: 2551 # exp.Properties.Location.POST_EXPRESSION 2552 extend_props(self._parse_properties()) 2553 2554 indexes = [] 2555 while True: 2556 index = self._parse_index() 2557 2558 # exp.Properties.Location.POST_INDEX 2559 extend_props(self._parse_properties()) 2560 if not index: 2561 break 2562 else: 2563 self._match(TokenType.COMMA) 2564 indexes.append(index) 2565 elif create_token_type == TokenType.VIEW: 2566 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2567 no_schema_binding = True 2568 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2569 extend_props(self._parse_properties()) 2570 2571 shallow = self._match_text_seq("SHALLOW") 2572 2573 if self._match_texts(self.CLONE_KEYWORDS): 2574 copy = self._prev.text.lower() == "copy" 2575 clone = self.expression( 2576 exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy) 2577 ) 2578 2579 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2580 return self._parse_as_command(start) 2581 2582 create_kind_text = create_token.text.upper() 2583 return self.expression( 2584 exp.Create( 2585 this=this, 2586 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2587 replace=replace, 2588 refresh=refresh, 2589 unique=unique, 2590 expression=expression, 2591 exists=exists, 2592 properties=properties, 2593 indexes=indexes, 2594 no_schema_binding=no_schema_binding, 2595 begin=begin, 2596 clone=clone, 2597 concurrently=concurrently, 2598 clustered=clustered, 2599 ) 2600 ) 2601 2602 def _parse_sequence_properties(self) -> exp.SequenceProperties | None: 2603 seq = exp.SequenceProperties() 2604 2605 options = [] 2606 index = self._index 2607 2608 while self._curr: 2609 self._match(TokenType.COMMA) 2610 if self._match_text_seq("INCREMENT"): 2611 self._match_text_seq("BY") 2612 self._match_text_seq("=") 2613 seq.set("increment", self._parse_term()) 2614 elif self._match_text_seq("MINVALUE"): 2615 seq.set("minvalue", self._parse_term()) 2616 elif self._match_text_seq("MAXVALUE"): 2617 seq.set("maxvalue", self._parse_term()) 2618 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2619 self._match_text_seq("=") 2620 seq.set("start", self._parse_term()) 2621 elif self._match_text_seq("CACHE"): 2622 # T-SQL allows empty CACHE which is initialized dynamically 2623 seq.set("cache", self._parse_number() or True) 2624 elif self._match_text_seq("OWNED", "BY"): 2625 # "OWNED BY NONE" is the default 2626 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2627 else: 2628 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2629 if opt: 2630 options.append(opt) 2631 else: 2632 break 2633 2634 seq.set("options", options if options else None) 2635 return None if self._index == index else seq 2636 2637 def _parse_trigger_events(self) -> list[exp.TriggerEvent]: 2638 events = [] 2639 2640 while True: 2641 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2642 2643 if not event_type: 2644 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2645 2646 columns = ( 2647 self._parse_csv(self._parse_column) 2648 if event_type == "UPDATE" and self._match_text_seq("OF") 2649 else None 2650 ) 2651 2652 events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns))) 2653 2654 if not self._match(TokenType.OR): 2655 break 2656 2657 return events 2658 2659 def _parse_trigger_deferrable( 2660 self, 2661 ) -> tuple[str | None, str | None]: 2662 deferrable_var = self._parse_var_from_options( 2663 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2664 ) 2665 deferrable = deferrable_var.this if deferrable_var else None 2666 2667 initially = None 2668 if deferrable and self._match_text_seq("INITIALLY"): 2669 initially = ( 2670 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2671 ) 2672 2673 return deferrable, initially 2674 2675 def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None: 2676 if not self._match_text_seq(keyword): 2677 return None 2678 if not self._match_text_seq("TABLE"): 2679 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2680 self._match_text_seq("AS") 2681 return self._parse_id_var() 2682 2683 def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None: 2684 if not self._match_text_seq("REFERENCING"): 2685 return None 2686 2687 old_alias = None 2688 new_alias = None 2689 2690 while True: 2691 if alias := self._parse_trigger_referencing_clause("OLD"): 2692 if old_alias is not None: 2693 self.raise_error("Duplicate OLD clause in REFERENCING") 2694 old_alias = alias 2695 elif alias := self._parse_trigger_referencing_clause("NEW"): 2696 if new_alias is not None: 2697 self.raise_error("Duplicate NEW clause in REFERENCING") 2698 new_alias = alias 2699 else: 2700 break 2701 2702 if old_alias is None and new_alias is None: 2703 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2704 2705 return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias)) 2706 2707 def _parse_trigger_for_each(self) -> str | None: 2708 if not self._match_text_seq("FOR", "EACH"): 2709 return None 2710 2711 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2712 2713 def _parse_trigger_execute(self) -> exp.TriggerExecute | None: 2714 if not self._match(TokenType.EXECUTE): 2715 return None 2716 2717 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2718 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2719 2720 func_call = self._parse_column() 2721 return self.expression(exp.TriggerExecute(this=func_call)) 2722 2723 def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None: 2724 # only used for teradata currently 2725 self._match(TokenType.COMMA) 2726 2727 kwargs = { 2728 "no": self._match_text_seq("NO"), 2729 "dual": self._match_text_seq("DUAL"), 2730 "before": self._match_text_seq("BEFORE"), 2731 "default": self._match_text_seq("DEFAULT"), 2732 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2733 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2734 "after": self._match_text_seq("AFTER"), 2735 "minimum": self._match_texts(("MIN", "MINIMUM")), 2736 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2737 } 2738 2739 if self._match_texts(self.PROPERTY_PARSERS): 2740 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2741 try: 2742 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2743 except TypeError: 2744 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2745 2746 return None 2747 2748 def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]: 2749 return self._parse_wrapped_csv(self._parse_property) 2750 2751 def _parse_property(self) -> exp.Expr | list[exp.Expr] | None: 2752 if self._match_texts(self.PROPERTY_PARSERS): 2753 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2754 2755 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2756 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2757 2758 if self._match_text_seq("COMPOUND", "SORTKEY"): 2759 return self._parse_sortkey(compound=True) 2760 2761 if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"): 2762 return self.expression(exp.ParameterStyleProperty(this="PANDAS")) 2763 2764 index = self._index 2765 2766 seq_props = self._parse_sequence_properties() 2767 if seq_props: 2768 return seq_props 2769 2770 self._retreat(index) 2771 key = self._parse_column() 2772 2773 if not self._match(TokenType.EQ): 2774 self._retreat(index) 2775 return None 2776 2777 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2778 if isinstance(key, exp.Column): 2779 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2780 2781 value = self._parse_bitwise() or self._parse_var(any_token=True) 2782 2783 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2784 if isinstance(value, exp.Column): 2785 value = exp.var(value.name) 2786 2787 return self.expression(exp.Property(this=key, value=value)) 2788 2789 def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty: 2790 if self._match_text_seq("BY"): 2791 return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string())) 2792 2793 self._match(TokenType.ALIAS) 2794 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2795 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2796 2797 return self.expression( 2798 exp.FileFormatProperty( 2799 this=( 2800 self.expression( 2801 exp.InputOutputFormat( 2802 input_format=input_format, output_format=output_format 2803 ) 2804 ) 2805 if input_format or output_format 2806 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2807 ), 2808 hive_format=True, 2809 ) 2810 ) 2811 2812 def _parse_unquoted_field(self) -> exp.Expr | None: 2813 field = self._parse_field() 2814 if isinstance(field, exp.Identifier) and not field.quoted: 2815 field = exp.var(field) 2816 2817 return field 2818 2819 def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E: 2820 self._match(TokenType.EQ) 2821 self._match(TokenType.ALIAS) 2822 2823 return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs)) 2824 2825 def _parse_properties(self, before: bool | None = None) -> exp.Properties | None: 2826 properties = [] 2827 while True: 2828 if before: 2829 prop = self._parse_property_before() 2830 else: 2831 prop = self._parse_property() 2832 if not prop: 2833 break 2834 for p in ensure_list(prop): 2835 properties.append(p) 2836 2837 if properties: 2838 return self.expression(exp.Properties(expressions=properties)) 2839 2840 return None 2841 2842 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2843 return self.expression( 2844 exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION")) 2845 ) 2846 2847 def _parse_sql_security(self) -> exp.SqlSecurityProperty: 2848 return self.expression( 2849 exp.SqlSecurityProperty( 2850 this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper() 2851 ) 2852 ) 2853 2854 def _parse_settings_property(self) -> exp.SettingsProperty: 2855 return self.expression( 2856 exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment)) 2857 ) 2858 2859 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2860 if self._index >= 2: 2861 pre_volatile_token = self._tokens[self._index - 2] 2862 else: 2863 pre_volatile_token = None 2864 2865 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2866 return exp.VolatileProperty() 2867 2868 return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE"))) 2869 2870 def _parse_retention_period(self) -> exp.Var: 2871 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2872 number = self._parse_number() 2873 number_str = f"{number} " if number else "" 2874 unit = self._parse_var(any_token=True) 2875 return exp.var(f"{number_str}{unit}") 2876 2877 def _parse_system_versioning_property( 2878 self, with_: bool = False 2879 ) -> exp.WithSystemVersioningProperty: 2880 self._match(TokenType.EQ) 2881 prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_)) 2882 2883 if self._match_text_seq("OFF"): 2884 prop.set("on", False) 2885 return prop 2886 2887 self._match(TokenType.ON) 2888 if self._match(TokenType.L_PAREN): 2889 while self._curr and not self._match(TokenType.R_PAREN): 2890 if self._match_text_seq("HISTORY_TABLE", "="): 2891 prop.set("this", self._parse_table_parts()) 2892 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2893 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2894 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2895 prop.set("retention_period", self._parse_retention_period()) 2896 2897 self._match(TokenType.COMMA) 2898 2899 return prop 2900 2901 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2902 self._match(TokenType.EQ) 2903 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2904 prop = self.expression(exp.DataDeletionProperty(on=on)) 2905 2906 if self._match(TokenType.L_PAREN): 2907 while self._curr and not self._match(TokenType.R_PAREN): 2908 if self._match_text_seq("FILTER_COLUMN", "="): 2909 prop.set("filter_column", self._parse_column()) 2910 elif self._match_text_seq("RETENTION_PERIOD", "="): 2911 prop.set("retention_period", self._parse_retention_period()) 2912 2913 self._match(TokenType.COMMA) 2914 2915 return prop 2916 2917 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2918 kind = "HASH" 2919 expressions: list[exp.Expr] | None = None 2920 if self._match_text_seq("BY", "HASH"): 2921 expressions = self._parse_wrapped_csv(self._parse_id_var) 2922 elif self._match_text_seq("BY", "RANDOM"): 2923 kind = "RANDOM" 2924 2925 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2926 buckets: exp.Expr | None = None 2927 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2928 buckets = self._parse_number() 2929 2930 return self.expression( 2931 exp.DistributedByProperty( 2932 expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order() 2933 ) 2934 ) 2935 2936 def _parse_composite_key_property(self, expr_type: type[E]) -> E: 2937 self._match_text_seq("KEY") 2938 expressions = self._parse_wrapped_id_vars() 2939 return self.expression(expr_type(expressions=expressions)) 2940 2941 def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]: 2942 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2943 prop = self._parse_system_versioning_property(with_=True) 2944 self._match_r_paren() 2945 return prop 2946 2947 if self._match(TokenType.L_PAREN, advance=False): 2948 result: list[exp.Expr] = [] 2949 for i in self._parse_wrapped_properties(): 2950 result.extend(i) if isinstance(i, list) else result.append(i) 2951 return result 2952 2953 if self._match_text_seq("JOURNAL"): 2954 return self._parse_withjournaltable() 2955 2956 if self._match_texts(self.VIEW_ATTRIBUTES): 2957 return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper())) 2958 2959 if self._match_text_seq("DATA"): 2960 return self._parse_withdata(no=False) 2961 elif self._match_text_seq("NO", "DATA"): 2962 return self._parse_withdata(no=True) 2963 2964 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2965 return self._parse_serde_properties(with_=True) 2966 2967 if self._match(TokenType.SCHEMA): 2968 return self.expression( 2969 exp.WithSchemaBindingProperty( 2970 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS) 2971 ) 2972 ) 2973 2974 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2975 return self.expression( 2976 exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option)) 2977 ) 2978 2979 if not self._next: 2980 return None 2981 2982 return self._parse_withisolatedloading() 2983 2984 def _parse_procedure_option(self) -> exp.Expr | None: 2985 if self._match_text_seq("EXECUTE", "AS"): 2986 return self.expression( 2987 exp.ExecuteAsProperty( 2988 this=self._parse_var_from_options( 2989 self.EXECUTE_AS_OPTIONS, raise_unmatched=False 2990 ) 2991 or self._parse_string() 2992 ) 2993 ) 2994 2995 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2996 2997 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2998 def _parse_definer(self) -> exp.DefinerProperty | None: 2999 self._match(TokenType.EQ) 3000 3001 user = self._parse_id_var() 3002 self._match(TokenType.PARAMETER) 3003 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 3004 3005 if not user or not host: 3006 return None 3007 3008 return exp.DefinerProperty(this=f"{user}@{host}") 3009 3010 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 3011 self._match(TokenType.TABLE) 3012 self._match(TokenType.EQ) 3013 return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts())) 3014 3015 def _parse_log(self, no: bool = False) -> exp.LogProperty: 3016 return self.expression(exp.LogProperty(no=no)) 3017 3018 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 3019 return self.expression(exp.JournalProperty(**kwargs)) 3020 3021 def _parse_checksum(self) -> exp.ChecksumProperty: 3022 self._match(TokenType.EQ) 3023 3024 on = None 3025 if self._match(TokenType.ON): 3026 on = True 3027 elif self._match_text_seq("OFF"): 3028 on = False 3029 3030 return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT))) 3031 3032 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 3033 return self.expression( 3034 exp.Cluster( 3035 expressions=( 3036 self._parse_wrapped_csv(self._parse_ordered) 3037 if wrapped 3038 else self._parse_csv(self._parse_ordered) 3039 ) 3040 ) 3041 ) 3042 3043 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 3044 self._match_text_seq("BY") 3045 3046 self._match_l_paren() 3047 expressions = self._parse_csv(self._parse_column) 3048 self._match_r_paren() 3049 3050 if self._match_text_seq("SORTED", "BY"): 3051 self._match_l_paren() 3052 sorted_by = self._parse_csv(self._parse_ordered) 3053 self._match_r_paren() 3054 else: 3055 sorted_by = None 3056 3057 self._match(TokenType.INTO) 3058 buckets = self._parse_number() 3059 self._match_text_seq("BUCKETS") 3060 3061 return self.expression( 3062 exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets) 3063 ) 3064 3065 def _parse_copy_property(self) -> exp.CopyGrantsProperty | None: 3066 if not self._match_text_seq("GRANTS"): 3067 self._retreat(self._index - 1) 3068 return None 3069 3070 return self.expression(exp.CopyGrantsProperty()) 3071 3072 def _parse_freespace(self) -> exp.FreespaceProperty: 3073 self._match(TokenType.EQ) 3074 return self.expression( 3075 exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT)) 3076 ) 3077 3078 def _parse_mergeblockratio( 3079 self, no: bool = False, default: bool = False 3080 ) -> exp.MergeBlockRatioProperty: 3081 if self._match(TokenType.EQ): 3082 return self.expression( 3083 exp.MergeBlockRatioProperty( 3084 this=self._parse_number(), percent=self._match(TokenType.PERCENT) 3085 ) 3086 ) 3087 3088 return self.expression(exp.MergeBlockRatioProperty(no=no, default=default)) 3089 3090 def _parse_datablocksize( 3091 self, 3092 default: bool | None = None, 3093 minimum: bool | None = None, 3094 maximum: bool | None = None, 3095 ) -> exp.DataBlocksizeProperty: 3096 self._match(TokenType.EQ) 3097 size = self._parse_number() 3098 3099 units = None 3100 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 3101 units = self._prev.text 3102 3103 return self.expression( 3104 exp.DataBlocksizeProperty( 3105 size=size, units=units, default=default, minimum=minimum, maximum=maximum 3106 ) 3107 ) 3108 3109 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 3110 self._match(TokenType.EQ) 3111 always = self._match_text_seq("ALWAYS") 3112 manual = self._match_text_seq("MANUAL") 3113 never = self._match_text_seq("NEVER") 3114 default = self._match_text_seq("DEFAULT") 3115 3116 autotemp = None 3117 if self._match_text_seq("AUTOTEMP"): 3118 autotemp = self._parse_schema() 3119 3120 return self.expression( 3121 exp.BlockCompressionProperty( 3122 always=always, manual=manual, never=never, default=default, autotemp=autotemp 3123 ) 3124 ) 3125 3126 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None: 3127 index = self._index 3128 no = self._match_text_seq("NO") 3129 concurrent = self._match_text_seq("CONCURRENT") 3130 3131 if not self._match_text_seq("ISOLATED", "LOADING"): 3132 self._retreat(index) 3133 return None 3134 3135 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 3136 return self.expression( 3137 exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target) 3138 ) 3139 3140 def _parse_locking(self) -> exp.LockingProperty: 3141 if self._match(TokenType.TABLE): 3142 kind = "TABLE" 3143 elif self._match(TokenType.VIEW): 3144 kind = "VIEW" 3145 elif self._match(TokenType.ROW): 3146 kind = "ROW" 3147 elif self._match_text_seq("DATABASE"): 3148 kind = "DATABASE" 3149 else: 3150 kind = None 3151 3152 if kind in ("DATABASE", "TABLE", "VIEW"): 3153 this = self._parse_table_parts() 3154 else: 3155 this = None 3156 3157 if self._match(TokenType.FOR): 3158 for_or_in = "FOR" 3159 elif self._match(TokenType.IN): 3160 for_or_in = "IN" 3161 else: 3162 for_or_in = None 3163 3164 if self._match_text_seq("ACCESS"): 3165 lock_type = "ACCESS" 3166 elif self._match_texts(("EXCL", "EXCLUSIVE")): 3167 lock_type = "EXCLUSIVE" 3168 elif self._match_text_seq("SHARE"): 3169 lock_type = "SHARE" 3170 elif self._match_text_seq("READ"): 3171 lock_type = "READ" 3172 elif self._match_text_seq("WRITE"): 3173 lock_type = "WRITE" 3174 elif self._match_text_seq("CHECKSUM"): 3175 lock_type = "CHECKSUM" 3176 else: 3177 lock_type = None 3178 3179 override = self._match_text_seq("OVERRIDE") 3180 3181 return self.expression( 3182 exp.LockingProperty( 3183 this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override 3184 ) 3185 ) 3186 3187 def _parse_partition_by(self) -> list[exp.Expr]: 3188 if self._match(TokenType.PARTITION_BY): 3189 return self._parse_csv(self._parse_disjunction) 3190 return [] 3191 3192 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 3193 def _parse_partition_bound_expr() -> exp.Expr | None: 3194 if self._match_text_seq("MINVALUE"): 3195 return exp.var("MINVALUE") 3196 if self._match_text_seq("MAXVALUE"): 3197 return exp.var("MAXVALUE") 3198 return self._parse_bitwise() 3199 3200 this: exp.Expr | list[exp.Expr] | None = None 3201 expression = None 3202 from_expressions = None 3203 to_expressions = None 3204 3205 if self._match(TokenType.IN): 3206 this = self._parse_wrapped_csv(self._parse_bitwise) 3207 elif self._match(TokenType.FROM): 3208 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3209 self._match_text_seq("TO") 3210 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3211 elif self._match_text_seq("WITH", "(", "MODULUS"): 3212 this = self._parse_number() 3213 self._match_text_seq(",", "REMAINDER") 3214 expression = self._parse_number() 3215 self._match_r_paren() 3216 else: 3217 self.raise_error("Failed to parse partition bound spec.") 3218 3219 return self.expression( 3220 exp.PartitionBoundSpec( 3221 this=this, 3222 expression=expression, 3223 from_expressions=from_expressions, 3224 to_expressions=to_expressions, 3225 ) 3226 ) 3227 3228 # https://www.postgresql.org/docs/current/sql-createtable.html 3229 def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None: 3230 if not self._match_text_seq("OF"): 3231 self._retreat(self._index - 1) 3232 return None 3233 3234 this = self._parse_table(schema=True) 3235 3236 if self._match(TokenType.DEFAULT): 3237 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3238 elif self._match_text_seq("FOR", "VALUES"): 3239 expression = self._parse_partition_bound_spec() 3240 else: 3241 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3242 3243 return self.expression(exp.PartitionedOfProperty(this=this, expression=expression)) 3244 3245 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3246 self._match(TokenType.EQ) 3247 return self.expression( 3248 exp.PartitionedByProperty( 3249 this=self._parse_schema() or self._parse_bracket(self._parse_field()) 3250 ) 3251 ) 3252 3253 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3254 if self._match_text_seq("AND", "STATISTICS"): 3255 statistics = True 3256 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3257 statistics = False 3258 else: 3259 statistics = None 3260 3261 return self.expression(exp.WithDataProperty(no=no, statistics=statistics)) 3262 3263 def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None: 3264 if self._match_text_seq("SQL"): 3265 return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL")) 3266 return None 3267 3268 def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None: 3269 if self._match_text_seq("SQL", "DATA"): 3270 return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA")) 3271 return None 3272 3273 def _parse_no_property(self) -> exp.Expr | None: 3274 if self._match_text_seq("PRIMARY", "INDEX"): 3275 return exp.NoPrimaryIndexProperty() 3276 if self._match_text_seq("SQL"): 3277 return self.expression(exp.SqlReadWriteProperty(this="NO SQL")) 3278 return None 3279 3280 def _parse_on_property(self) -> exp.Expr | None: 3281 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3282 return exp.OnCommitProperty() 3283 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3284 return exp.OnCommitProperty(delete=True) 3285 return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var()))) 3286 3287 def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None: 3288 if self._match_text_seq("SQL", "DATA"): 3289 return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA")) 3290 return None 3291 3292 def _parse_distkey(self) -> exp.DistKeyProperty: 3293 return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var))) 3294 3295 def _parse_create_like(self) -> exp.LikeProperty | None: 3296 table = self._parse_table(schema=True) 3297 3298 options = [] 3299 while self._match_texts(("INCLUDING", "EXCLUDING")): 3300 this = self._prev.text.upper() 3301 3302 id_var = self._parse_id_var() 3303 if not id_var: 3304 return None 3305 3306 options.append( 3307 self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper()))) 3308 ) 3309 3310 return self.expression(exp.LikeProperty(this=table, expressions=options)) 3311 3312 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3313 return self.expression( 3314 exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound) 3315 ) 3316 3317 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3318 self._match(TokenType.EQ) 3319 return self.expression( 3320 exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default) 3321 ) 3322 3323 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3324 self._match_text_seq("WITH", "CONNECTION") 3325 return self.expression( 3326 exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts()) 3327 ) 3328 3329 def _parse_returns(self) -> exp.ReturnsProperty: 3330 value: exp.Expr | None 3331 null = None 3332 is_table = self._match(TokenType.TABLE) 3333 3334 if is_table: 3335 if self._match(TokenType.LT): 3336 value = self.expression( 3337 exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types)) 3338 ) 3339 if not self._match(TokenType.GT): 3340 self.raise_error("Expecting >") 3341 else: 3342 value = self._parse_schema(exp.var("TABLE")) 3343 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3344 null = True 3345 value = None 3346 else: 3347 value = self._parse_types() 3348 3349 return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null)) 3350 3351 def _parse_describe(self) -> exp.Describe: 3352 kind = self._prev.text if self._match_set(self.CREATABLES) else None 3353 style: str | None = ( 3354 self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None 3355 ) 3356 if self._match(TokenType.DOT): 3357 style = None 3358 self._retreat(self._index - 2) 3359 3360 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3361 3362 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3363 this = self._parse_statement() 3364 else: 3365 this = self._parse_table(schema=True) 3366 3367 properties = self._parse_properties() 3368 expressions = properties.expressions if properties else None 3369 partition = self._parse_partition() 3370 return self.expression( 3371 exp.Describe( 3372 this=this, 3373 style=style, 3374 kind=kind, 3375 expressions=expressions, 3376 partition=partition, 3377 format=format, 3378 as_json=self._match_text_seq("AS", "JSON"), 3379 ) 3380 ) 3381 3382 def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts: 3383 kind = self._prev.text.upper() 3384 expressions = [] 3385 3386 def parse_conditional_insert() -> exp.ConditionalInsert | None: 3387 if self._match(TokenType.WHEN): 3388 expression = self._parse_disjunction() 3389 self._match(TokenType.THEN) 3390 else: 3391 expression = None 3392 3393 else_ = self._match(TokenType.ELSE) 3394 3395 if not self._match(TokenType.INTO): 3396 return None 3397 3398 return self.expression( 3399 exp.ConditionalInsert( 3400 this=self.expression( 3401 exp.Insert( 3402 this=self._parse_table(schema=True), 3403 expression=self._parse_derived_table_values(), 3404 ) 3405 ), 3406 expression=expression, 3407 else_=else_, 3408 ) 3409 ) 3410 3411 expression = parse_conditional_insert() 3412 while expression is not None: 3413 expressions.append(expression) 3414 expression = parse_conditional_insert() 3415 3416 return self.expression( 3417 exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()), 3418 comments=comments, 3419 ) 3420 3421 def _parse_insert(self) -> exp.Insert | exp.MultitableInserts: 3422 comments: list[str] = [] 3423 hint = self._parse_hint() 3424 overwrite = self._match(TokenType.OVERWRITE) 3425 ignore = self._match(TokenType.IGNORE) 3426 local = self._match_text_seq("LOCAL") 3427 alternative = None 3428 is_function = None 3429 3430 if self._match_text_seq("DIRECTORY"): 3431 this: exp.Expr | None = self.expression( 3432 exp.Directory( 3433 this=self._parse_var_or_string(), 3434 local=local, 3435 row_format=self._parse_row_format(match_row=True), 3436 ) 3437 ) 3438 else: 3439 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3440 comments += ensure_list(self._prev_comments) 3441 return self._parse_multitable_inserts(comments) 3442 3443 if self._match(TokenType.OR): 3444 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3445 3446 self._match(TokenType.INTO) 3447 comments += ensure_list(self._prev_comments) 3448 self._match(TokenType.TABLE) 3449 is_function = self._match(TokenType.FUNCTION) 3450 3451 this = self._parse_function() if is_function else self._parse_insert_table() 3452 3453 returning = self._parse_returning() # TSQL allows RETURNING before source 3454 3455 return self.expression( 3456 exp.Insert( 3457 hint=hint, 3458 is_function=is_function, 3459 this=this, 3460 stored=self._match_text_seq("STORED") and self._parse_stored(), 3461 by_name=self._match_text_seq("BY", "NAME"), 3462 exists=self._parse_exists(), 3463 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3464 and self._parse_disjunction(), 3465 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3466 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3467 default=self._match_text_seq("DEFAULT", "VALUES"), 3468 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3469 conflict=self._parse_on_conflict(), 3470 returning=returning or self._parse_returning(), 3471 overwrite=overwrite, 3472 alternative=alternative, 3473 ignore=ignore, 3474 source=self._match(TokenType.TABLE) and self._parse_table(), 3475 ), 3476 comments=comments, 3477 ) 3478 3479 def _parse_insert_table(self) -> exp.Expr | None: 3480 this = self._parse_table(schema=True, parse_partition=True) 3481 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3482 this.set("alias", self._parse_table_alias()) 3483 return this 3484 3485 def _parse_kill(self) -> exp.Kill: 3486 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3487 3488 return self.expression(exp.Kill(this=self._parse_primary(), kind=kind)) 3489 3490 def _parse_on_conflict(self) -> exp.OnConflict | None: 3491 conflict = self._match_text_seq("ON", "CONFLICT") 3492 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3493 3494 if not conflict and not duplicate: 3495 return None 3496 3497 conflict_keys = None 3498 constraint = None 3499 3500 if conflict: 3501 if self._match_text_seq("ON", "CONSTRAINT"): 3502 constraint = self._parse_id_var() 3503 elif self._match(TokenType.L_PAREN): 3504 conflict_keys = self._parse_csv(self._parse_id_var) 3505 self._match_r_paren() 3506 3507 index_predicate = self._parse_where() 3508 3509 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3510 if self._prev.token_type == TokenType.UPDATE: 3511 self._match(TokenType.SET) 3512 expressions = self._parse_csv(self._parse_equality) 3513 else: 3514 expressions = None 3515 3516 return self.expression( 3517 exp.OnConflict( 3518 duplicate=duplicate, 3519 expressions=expressions, 3520 action=action, 3521 conflict_keys=conflict_keys, 3522 index_predicate=index_predicate, 3523 constraint=constraint, 3524 where=self._parse_where(), 3525 ) 3526 ) 3527 3528 def _parse_returning(self) -> exp.Returning | None: 3529 if not self._match(TokenType.RETURNING): 3530 return None 3531 return self.expression( 3532 exp.Returning( 3533 expressions=self._parse_csv(self._parse_expression), 3534 into=self._match(TokenType.INTO) and self._parse_table_part(), 3535 ) 3536 ) 3537 3538 def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3539 if not self._match(TokenType.FORMAT): 3540 return None 3541 return self._parse_row_format() 3542 3543 def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None: 3544 index = self._index 3545 with_ = with_ or self._match_text_seq("WITH") 3546 3547 if not self._match(TokenType.SERDE_PROPERTIES): 3548 self._retreat(index) 3549 return None 3550 return self.expression( 3551 exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_) 3552 ) 3553 3554 def _parse_row_format( 3555 self, match_row: bool = False 3556 ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3557 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3558 return None 3559 3560 if self._match_text_seq("SERDE"): 3561 this = self._parse_string() 3562 3563 serde_properties = self._parse_serde_properties() 3564 3565 return self.expression( 3566 exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties) 3567 ) 3568 3569 self._match_text_seq("DELIMITED") 3570 3571 kwargs = {} 3572 3573 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3574 kwargs["fields"] = self._parse_string() 3575 if self._match_text_seq("ESCAPED", "BY"): 3576 kwargs["escaped"] = self._parse_string() 3577 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3578 kwargs["collection_items"] = self._parse_string() 3579 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3580 kwargs["map_keys"] = self._parse_string() 3581 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3582 kwargs["lines"] = self._parse_string() 3583 if self._match_text_seq("NULL", "DEFINED", "AS"): 3584 kwargs["null"] = self._parse_string() 3585 3586 return self.expression(exp.RowFormatDelimitedProperty(**kwargs)) # type: ignore 3587 3588 def _parse_load(self) -> exp.LoadData | exp.Command: 3589 if self._match_text_seq("DATA"): 3590 local = self._match_text_seq("LOCAL") 3591 self._match_text_seq("INPATH") 3592 inpath = self._parse_string() 3593 overwrite = self._match(TokenType.OVERWRITE) 3594 self._match_pair(TokenType.INTO, TokenType.TABLE) 3595 3596 return self.expression( 3597 exp.LoadData( 3598 this=self._parse_table(schema=True), 3599 local=local, 3600 overwrite=overwrite, 3601 inpath=inpath, 3602 files=self._match_text_seq("FROM", "FILES") 3603 and exp.Properties(expressions=self._parse_wrapped_properties()), 3604 partition=self._parse_partition(), 3605 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3606 serde=self._match_text_seq("SERDE") and self._parse_string(), 3607 ) 3608 ) 3609 return self._parse_as_command(self._prev) 3610 3611 def _parse_delete(self) -> exp.Delete: 3612 hint = self._parse_hint() 3613 3614 # This handles MySQL's "Multiple-Table Syntax" 3615 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3616 tables = None 3617 if not self._match(TokenType.FROM, advance=False): 3618 tables = self._parse_csv(self._parse_table) or None 3619 3620 returning = self._parse_returning() 3621 3622 return self.expression( 3623 exp.Delete( 3624 hint=hint, 3625 tables=tables, 3626 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3627 using=self._match(TokenType.USING) 3628 and self._parse_csv(lambda: self._parse_table(joins=True)), 3629 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3630 where=self._parse_where(), 3631 returning=returning or self._parse_returning(), 3632 order=self._parse_order(), 3633 limit=self._parse_limit(), 3634 ) 3635 ) 3636 3637 def _parse_update(self) -> exp.Update: 3638 hint = self._parse_hint() 3639 kwargs: dict[str, object] = { 3640 "hint": hint, 3641 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3642 } 3643 while self._curr: 3644 if self._match(TokenType.SET): 3645 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3646 elif self._match(TokenType.RETURNING, advance=False): 3647 kwargs["returning"] = self._parse_returning() 3648 elif self._match(TokenType.FROM, advance=False): 3649 from_ = self._parse_from(joins=True) 3650 table = from_.this if from_ else None 3651 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3652 table.set("joins", list(self._parse_joins()) or None) 3653 3654 kwargs["from_"] = from_ 3655 elif self._match(TokenType.WHERE, advance=False): 3656 kwargs["where"] = self._parse_where() 3657 elif self._match(TokenType.ORDER_BY, advance=False): 3658 kwargs["order"] = self._parse_order() 3659 elif self._match(TokenType.LIMIT, advance=False): 3660 kwargs["limit"] = self._parse_limit() 3661 else: 3662 break 3663 3664 return self.expression(exp.Update(**kwargs)) 3665 3666 def _parse_use(self) -> exp.Use: 3667 return self.expression( 3668 exp.Use( 3669 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3670 this=self._parse_table(schema=False), 3671 ) 3672 ) 3673 3674 def _parse_uncache(self) -> exp.Uncache: 3675 if not self._match(TokenType.TABLE): 3676 self.raise_error("Expecting TABLE after UNCACHE") 3677 3678 return self.expression( 3679 exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True)) 3680 ) 3681 3682 def _parse_cache(self) -> exp.Cache: 3683 lazy = self._match_text_seq("LAZY") 3684 self._match(TokenType.TABLE) 3685 table = self._parse_table(schema=True) 3686 3687 options = [] 3688 if self._match_text_seq("OPTIONS"): 3689 self._match_l_paren() 3690 k = self._parse_string() 3691 self._match(TokenType.EQ) 3692 v = self._parse_string() 3693 options = [k, v] 3694 self._match_r_paren() 3695 3696 self._match(TokenType.ALIAS) 3697 return self.expression( 3698 exp.Cache( 3699 this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True) 3700 ) 3701 ) 3702 3703 def _parse_partition(self) -> exp.Partition | None: 3704 if not self._match_texts(self.PARTITION_KEYWORDS): 3705 return None 3706 3707 return self.expression( 3708 exp.Partition( 3709 subpartition=self._prev.text.upper() == "SUBPARTITION", 3710 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3711 ) 3712 ) 3713 3714 def _parse_value(self, values: bool = True) -> exp.Tuple | None: 3715 def _parse_value_expression() -> exp.Expr | None: 3716 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3717 return exp.var(self._prev.text.upper()) 3718 return self._parse_expression() 3719 3720 if self._match(TokenType.L_PAREN): 3721 expressions = self._parse_csv(_parse_value_expression) 3722 self._match_r_paren() 3723 return self.expression(exp.Tuple(expressions=expressions)) 3724 3725 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3726 expression = self._parse_expression() 3727 if expression: 3728 return self.expression(exp.Tuple(expressions=[expression])) 3729 return None 3730 3731 def _parse_projections( 3732 self, 3733 ) -> tuple[list[exp.Expr], list[exp.Expr] | None]: 3734 return self._parse_expressions(), None 3735 3736 def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None: 3737 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3738 this: exp.Expr | None = self._parse_simplified_pivot( 3739 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3740 ) 3741 elif self._match(TokenType.FROM): 3742 from_ = self._parse_from(joins=True, skip_from_token=True, consume_pipe=True) 3743 # Support parentheses for duckdb FROM-first syntax 3744 select = self._parse_select(from_=from_) 3745 if select: 3746 if not select.args.get("from_"): 3747 select.set("from_", from_) 3748 this = select 3749 else: 3750 this = exp.select("*").from_(t.cast(exp.From, from_)) 3751 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3752 else: 3753 this = ( 3754 self._parse_table(consume_pipe=True) 3755 if table 3756 else self._parse_select(nested=True, parse_set_operation=False) 3757 ) 3758 3759 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3760 # in case a modifier (e.g. join) is following 3761 if table and isinstance(this, exp.Values) and this.alias: 3762 alias = this.args["alias"].pop() 3763 this = exp.Table(this=this, alias=alias) 3764 3765 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3766 3767 return this 3768 3769 def _parse_select( 3770 self, 3771 nested: bool = False, 3772 table: bool = False, 3773 parse_subquery_alias: bool = True, 3774 parse_set_operation: bool = True, 3775 consume_pipe: bool = True, 3776 from_: exp.From | None = None, 3777 ) -> exp.Expr | None: 3778 query = self._parse_select_query( 3779 nested=nested, 3780 table=table, 3781 parse_subquery_alias=parse_subquery_alias, 3782 parse_set_operation=parse_set_operation, 3783 ) 3784 3785 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3786 if not query and from_: 3787 query = exp.select("*").from_(from_) 3788 if isinstance(query, exp.Query): 3789 query = self._parse_pipe_syntax_query(query) 3790 query = query.subquery(copy=False) if query and table else query 3791 3792 return query 3793 3794 def _parse_select_query( 3795 self, 3796 nested: bool = False, 3797 table: bool = False, 3798 parse_subquery_alias: bool = True, 3799 parse_set_operation: bool = True, 3800 ) -> exp.Expr | None: 3801 cte = self._parse_with() 3802 3803 if cte: 3804 this = self._parse_statement() 3805 3806 if not this: 3807 self.raise_error("Failed to parse any statement following CTE") 3808 return cte 3809 3810 while isinstance(this, exp.Subquery) and this.is_wrapper: 3811 this = this.this 3812 3813 assert this is not None 3814 if "with_" in this.arg_types: 3815 this.set("with_", cte) 3816 else: 3817 self.raise_error(f"{this.key} does not support CTE") 3818 this = cte 3819 3820 return this 3821 3822 # duckdb supports leading with FROM x 3823 from_ = ( 3824 self._parse_from(joins=True, consume_pipe=True) 3825 if self._match(TokenType.FROM, advance=False) 3826 else None 3827 ) 3828 3829 if self._match(TokenType.SELECT): 3830 comments = self._prev_comments 3831 3832 hint = self._parse_hint() 3833 3834 if self._next and not self._next.token_type == TokenType.DOT: 3835 all_ = self._match(TokenType.ALL) 3836 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3837 else: 3838 all_, matched_distinct = None, False 3839 3840 kind = ( 3841 self._prev.text.upper() 3842 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3843 else None 3844 ) 3845 3846 distinct: exp.Expr | None = ( 3847 self.expression( 3848 exp.Distinct( 3849 on=self._parse_value(values=False) if self._match(TokenType.ON) else None 3850 ) 3851 ) 3852 if matched_distinct 3853 else None 3854 ) 3855 3856 operation_modifiers = [] 3857 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3858 operation_modifiers.append(exp.var(self._prev.text.upper())) 3859 3860 limit = self._parse_limit(top=True) 3861 3862 # Some dialects (e.g. Redshift, T-SQL) allow SELECT TOP N DISTINCT ... 3863 if limit and not matched_distinct and not all_: 3864 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3865 if matched_distinct: 3866 distinct = self.expression( 3867 exp.Distinct( 3868 on=self._parse_value(values=False) 3869 if self._match(TokenType.ON) 3870 else None 3871 ) 3872 ) 3873 else: 3874 all_ = self._match(TokenType.ALL) 3875 3876 if all_ and distinct: 3877 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3878 3879 projections, exclude = self._parse_projections() 3880 3881 this = self.expression( 3882 exp.Select( 3883 kind=kind, 3884 hint=hint, 3885 distinct=distinct, 3886 expressions=projections, 3887 limit=limit, 3888 exclude=exclude, 3889 operation_modifiers=operation_modifiers or None, 3890 ) 3891 ) 3892 this.comments = comments 3893 3894 into = self._parse_into() 3895 if into: 3896 this.set("into", into) 3897 3898 if not from_: 3899 from_ = self._parse_from() 3900 3901 if from_: 3902 this.set("from_", from_) 3903 3904 this = self._parse_query_modifiers(this) 3905 elif (table or nested) and self._match(TokenType.L_PAREN): 3906 comments = self._prev_comments 3907 this = self._parse_wrapped_select(table=table) 3908 3909 if this: 3910 this.add_comments(comments, prepend=True) 3911 3912 # We return early here so that the UNION isn't attached to the subquery by the 3913 # following call to _parse_set_operations, but instead becomes the parent node 3914 self._match_r_paren() 3915 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3916 elif self._match(TokenType.VALUES, advance=False): 3917 this = self._parse_derived_table_values() 3918 elif from_: 3919 this = exp.select("*").from_(from_.this, copy=False) 3920 this = self._parse_query_modifiers(this) 3921 elif self._match(TokenType.SUMMARIZE): 3922 table = self._match(TokenType.TABLE) 3923 this = self._parse_select() or self._parse_string() or self._parse_table() 3924 return self.expression(exp.Summarize(this=this, table=table)) 3925 elif self._match(TokenType.DESCRIBE): 3926 this = self._parse_describe() 3927 else: 3928 this = None 3929 3930 return self._parse_set_operations(this) if parse_set_operation else this 3931 3932 def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None: 3933 self._match_text_seq("SEARCH") 3934 3935 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3936 3937 if not kind: 3938 return None 3939 3940 self._match_text_seq("FIRST", "BY") 3941 3942 return self.expression( 3943 exp.RecursiveWithSearch( 3944 kind=kind, 3945 this=self._parse_id_var(), 3946 expression=self._match_text_seq("SET") and self._parse_id_var(), 3947 using=self._match_text_seq("USING") and self._parse_id_var(), 3948 ) 3949 ) 3950 3951 def _parse_with(self, skip_with_token: bool = False) -> exp.With | None: 3952 if not skip_with_token and not self._match(TokenType.WITH): 3953 return None 3954 3955 comments = self._prev_comments 3956 recursive = self._match(TokenType.RECURSIVE) 3957 3958 last_comments = None 3959 expressions = [] 3960 while True: 3961 cte = self._parse_cte() 3962 if isinstance(cte, exp.CTE): 3963 expressions.append(cte) 3964 if last_comments: 3965 cte.add_comments(last_comments) 3966 3967 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3968 break 3969 else: 3970 self._match(TokenType.WITH) 3971 3972 last_comments = self._prev_comments 3973 3974 return self.expression( 3975 exp.With( 3976 expressions=expressions, 3977 recursive=recursive or None, 3978 search=self._parse_recursive_with_search(), 3979 ), 3980 comments=comments, 3981 ) 3982 3983 def _parse_cte(self) -> exp.CTE | None: 3984 index = self._index 3985 3986 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3987 if not alias or not alias.this: 3988 self.raise_error("Expected CTE to have alias") 3989 3990 key_expressions = ( 3991 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3992 ) 3993 3994 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3995 self._retreat(index) 3996 return None 3997 3998 comments = self._prev_comments 3999 4000 if self._match_text_seq("NOT", "MATERIALIZED"): 4001 materialized = False 4002 elif self._match_text_seq("MATERIALIZED"): 4003 materialized = True 4004 else: 4005 materialized = None 4006 4007 cte = self.expression( 4008 exp.CTE( 4009 this=self._parse_wrapped(self._parse_statement), 4010 alias=alias, 4011 materialized=materialized, 4012 key_expressions=key_expressions, 4013 ), 4014 comments=comments, 4015 ) 4016 4017 values = cte.this 4018 if isinstance(values, exp.Values): 4019 if values.alias: 4020 cte.set("this", exp.select("*").from_(values)) 4021 else: 4022 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 4023 4024 return cte 4025 4026 def _parse_table_alias( 4027 self, alias_tokens: t.Collection[TokenType] | None = None 4028 ) -> exp.TableAlias | None: 4029 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 4030 # so this section tries to parse the clause version and if it fails, it treats the token 4031 # as an identifier (alias) 4032 if self._can_parse_limit_or_offset(): 4033 return None 4034 4035 any_token = self._match(TokenType.ALIAS) 4036 alias = ( 4037 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4038 or self._parse_string_as_identifier() 4039 ) 4040 4041 index = self._index 4042 if self._match(TokenType.L_PAREN): 4043 columns = self._parse_csv(self._parse_function_parameter) 4044 self._match_r_paren() if columns else self._retreat(index) 4045 else: 4046 columns = None 4047 4048 if not alias and not columns: 4049 return None 4050 4051 table_alias = self.expression(exp.TableAlias(this=alias, columns=columns)) 4052 4053 # We bubble up comments from the Identifier to the TableAlias 4054 if isinstance(alias, exp.Identifier): 4055 table_alias.add_comments(alias.pop_comments()) 4056 4057 return table_alias 4058 4059 def _parse_subquery( 4060 self, this: exp.Expr | None, parse_alias: bool = True 4061 ) -> exp.Subquery | None: 4062 if not this: 4063 return None 4064 4065 return self.expression( 4066 exp.Subquery( 4067 this=this, 4068 pivots=self._parse_pivots(), 4069 alias=self._parse_table_alias() if parse_alias else None, 4070 sample=self._parse_table_sample(), 4071 ) 4072 ) 4073 4074 def _implicit_unnests_to_explicit(self, this: E) -> E: 4075 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 4076 4077 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 4078 for i, join in enumerate(this.args.get("joins") or []): 4079 table = join.this 4080 normalized_table = table.copy() 4081 normalized_table.meta["maybe_column"] = True 4082 normalized_table = _norm(normalized_table, dialect=self.dialect) 4083 4084 if isinstance(table, exp.Table) and not join.args.get("on"): 4085 if normalized_table.parts[0].name in refs: 4086 table_as_column = table.to_column() 4087 unnest = exp.Unnest(expressions=[table_as_column]) 4088 4089 # Table.to_column creates a parent Alias node that we want to convert to 4090 # a TableAlias and attach to the Unnest, so it matches the parser's output 4091 if isinstance(table.args.get("alias"), exp.TableAlias): 4092 table_as_column.replace(table_as_column.this) 4093 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 4094 4095 table.replace(unnest) 4096 4097 refs.add(normalized_table.alias_or_name) 4098 4099 return this 4100 4101 @t.overload 4102 def _parse_query_modifiers(self, this: E) -> E: ... 4103 4104 @t.overload 4105 def _parse_query_modifiers(self, this: None) -> None: ... 4106 4107 def _parse_query_modifiers(self, this): 4108 if isinstance(this, self.MODIFIABLES): 4109 for join in self._parse_joins(): 4110 this.append("joins", join) 4111 for lateral in iter(self._parse_lateral, None): 4112 this.append("laterals", lateral) 4113 4114 while True: 4115 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 4116 modifier_token = self._curr 4117 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 4118 key, expression = parser(self) 4119 4120 if expression: 4121 if this.args.get(key): 4122 self.raise_error( 4123 f"Found multiple '{modifier_token.text.upper()}' clauses", 4124 token=modifier_token, 4125 ) 4126 4127 this.set(key, expression) 4128 if key == "limit": 4129 offset = expression.args.get("offset") 4130 expression.set("offset", None) 4131 4132 if offset: 4133 offset = exp.Offset(expression=offset) 4134 this.set("offset", offset) 4135 4136 limit_by_expressions = expression.expressions 4137 expression.set("expressions", None) 4138 offset.set("expressions", limit_by_expressions) 4139 continue 4140 break 4141 4142 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 4143 this = self._implicit_unnests_to_explicit(this) 4144 4145 return this 4146 4147 def _parse_hint_fallback_to_string(self) -> exp.Hint | None: 4148 start = self._curr 4149 while self._curr: 4150 self._advance() 4151 4152 end = self._tokens[self._index - 1] 4153 return exp.Hint(expressions=[self._find_sql(start, end)]) 4154 4155 def _parse_hint_function_call(self) -> exp.Expr | None: 4156 return self._parse_function_call() 4157 4158 def _parse_hint_body(self) -> exp.Hint | None: 4159 start_index = self._index 4160 should_fallback_to_string = False 4161 4162 hints = [] 4163 try: 4164 for hint in iter( 4165 lambda: self._parse_csv( 4166 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 4167 ), 4168 [], 4169 ): 4170 hints.extend(hint) 4171 except ParseError: 4172 should_fallback_to_string = True 4173 4174 if should_fallback_to_string or self._curr: 4175 self._retreat(start_index) 4176 return self._parse_hint_fallback_to_string() 4177 4178 return self.expression(exp.Hint(expressions=hints)) 4179 4180 def _parse_hint(self) -> exp.Hint | None: 4181 if self._match(TokenType.HINT) and self._prev_comments: 4182 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 4183 4184 return None 4185 4186 def _parse_into(self) -> exp.Into | None: 4187 if not self._match(TokenType.INTO): 4188 return None 4189 4190 temp = self._match(TokenType.TEMPORARY) 4191 unlogged = self._match_text_seq("UNLOGGED") 4192 self._match(TokenType.TABLE) 4193 4194 return self.expression( 4195 exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged) 4196 ) 4197 4198 def _parse_from( 4199 self, 4200 joins: bool = False, 4201 skip_from_token: bool = False, 4202 consume_pipe: bool = False, 4203 ) -> exp.From | None: 4204 if not skip_from_token and not self._match(TokenType.FROM): 4205 return None 4206 4207 comments = self._prev_comments 4208 return self.expression( 4209 exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)), 4210 comments=comments, 4211 ) 4212 4213 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 4214 return self.expression( 4215 exp.MatchRecognizeMeasure( 4216 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 4217 this=self._parse_expression(), 4218 ) 4219 ) 4220 4221 def _parse_match_recognize(self) -> exp.MatchRecognize | None: 4222 if not self._match(TokenType.MATCH_RECOGNIZE): 4223 return None 4224 4225 self._match_l_paren() 4226 4227 partition = self._parse_partition_by() 4228 order = self._parse_order() 4229 4230 measures = ( 4231 self._parse_csv(self._parse_match_recognize_measure) 4232 if self._match_text_seq("MEASURES") 4233 else None 4234 ) 4235 4236 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 4237 rows = exp.var("ONE ROW PER MATCH") 4238 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 4239 text = "ALL ROWS PER MATCH" 4240 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 4241 text += " SHOW EMPTY MATCHES" 4242 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 4243 text += " OMIT EMPTY MATCHES" 4244 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 4245 text += " WITH UNMATCHED ROWS" 4246 rows = exp.var(text) 4247 else: 4248 rows = None 4249 4250 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4251 text = "AFTER MATCH SKIP" 4252 if self._match_text_seq("PAST", "LAST", "ROW"): 4253 text += " PAST LAST ROW" 4254 elif self._match_text_seq("TO", "NEXT", "ROW"): 4255 text += " TO NEXT ROW" 4256 elif self._match_text_seq("TO", "FIRST"): 4257 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4258 elif self._match_text_seq("TO", "LAST"): 4259 text += f" TO LAST {self._advance_any().text}" # type: ignore 4260 after = exp.var(text) 4261 else: 4262 after = None 4263 4264 if self._match_text_seq("PATTERN"): 4265 self._match_l_paren() 4266 4267 if not self._curr: 4268 self.raise_error("Expecting )", self._curr) 4269 4270 paren = 1 4271 start = self._curr 4272 4273 while self._curr and paren > 0: 4274 if self._curr.token_type == TokenType.L_PAREN: 4275 paren += 1 4276 if self._curr.token_type == TokenType.R_PAREN: 4277 paren -= 1 4278 4279 end = self._prev 4280 self._advance() 4281 4282 if paren > 0: 4283 self.raise_error("Expecting )", self._curr) 4284 4285 pattern = exp.var(self._find_sql(start, end)) 4286 else: 4287 pattern = None 4288 4289 define = ( 4290 self._parse_csv(self._parse_name_as_expression) 4291 if self._match_text_seq("DEFINE") 4292 else None 4293 ) 4294 4295 self._match_r_paren() 4296 4297 return self.expression( 4298 exp.MatchRecognize( 4299 partition_by=partition, 4300 order=order, 4301 measures=measures, 4302 rows=rows, 4303 after=after, 4304 pattern=pattern, 4305 define=define, 4306 alias=self._parse_table_alias(), 4307 ) 4308 ) 4309 4310 def _parse_lateral(self) -> exp.Lateral | None: 4311 cross_apply: bool | None = None 4312 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4313 cross_apply = True 4314 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4315 cross_apply = False 4316 4317 if cross_apply is not None: 4318 this = self._parse_select(table=True) 4319 view = None 4320 outer = None 4321 elif self._match(TokenType.LATERAL): 4322 this = self._parse_select(table=True) 4323 view = self._match(TokenType.VIEW) 4324 outer = self._match(TokenType.OUTER) 4325 else: 4326 return None 4327 4328 if not this: 4329 this = ( 4330 self._parse_unnest() 4331 or self._parse_function() 4332 or self._parse_id_var(any_token=False) 4333 ) 4334 4335 while self._match(TokenType.DOT): 4336 this = exp.Dot( 4337 this=this, 4338 expression=self._parse_function() or self._parse_id_var(any_token=False), 4339 ) 4340 4341 ordinality: bool | None = None 4342 4343 if view: 4344 table = self._parse_id_var(any_token=False) 4345 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4346 table_alias: exp.TableAlias | None = self.expression( 4347 exp.TableAlias(this=table, columns=columns) 4348 ) 4349 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4350 # We move the alias from the lateral's child node to the lateral itself 4351 table_alias = this.args["alias"].pop() 4352 else: 4353 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4354 table_alias = self._parse_table_alias() 4355 4356 return self.expression( 4357 exp.Lateral( 4358 this=this, 4359 view=view, 4360 outer=outer, 4361 alias=table_alias, 4362 cross_apply=cross_apply, 4363 ordinality=ordinality, 4364 ) 4365 ) 4366 4367 def _parse_stream(self) -> exp.Stream | None: 4368 index = self._index 4369 if self._match(TokenType.STREAM): 4370 if this := self._try_parse(self._parse_table): 4371 return self.expression(exp.Stream(this=this)) 4372 self._retreat(index) 4373 return None 4374 4375 def _parse_join_parts( 4376 self, 4377 ) -> tuple[Token | None, Token | None, Token | None]: 4378 return ( 4379 self._prev if self._match_set(self.JOIN_METHODS) else None, 4380 self._prev if self._match_set(self.JOIN_SIDES) else None, 4381 self._prev if self._match_set(self.JOIN_KINDS) else None, 4382 ) 4383 4384 def _parse_using_identifiers(self) -> list[exp.Expr]: 4385 def _parse_column_as_identifier() -> exp.Expr | None: 4386 this = self._parse_column() 4387 if isinstance(this, exp.Column): 4388 return this.this 4389 return this 4390 4391 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4392 4393 def _parse_join( 4394 self, skip_join_token: bool = False, parse_bracket: bool = False 4395 ) -> exp.Join | None: 4396 if self._match(TokenType.COMMA): 4397 table = self._try_parse(self._parse_table) 4398 cross_join = self.expression(exp.Join(this=table)) if table else None 4399 4400 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4401 cross_join.set("kind", "CROSS") 4402 4403 return cross_join 4404 4405 index = self._index 4406 method, side, kind = self._parse_join_parts() 4407 directed = self._match_text_seq("DIRECTED") 4408 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4409 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4410 join_comments = self._prev_comments 4411 4412 if not skip_join_token and not join: 4413 self._retreat(index) 4414 kind = None 4415 method = None 4416 side = None 4417 4418 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4419 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4420 4421 if not skip_join_token and not join and not outer_apply and not cross_apply: 4422 return None 4423 4424 kwargs: dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4425 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4426 kwargs["expressions"] = self._parse_csv( 4427 lambda: self._parse_table(parse_bracket=parse_bracket) 4428 ) 4429 4430 if method: 4431 kwargs["method"] = method.text.upper() 4432 if side: 4433 kwargs["side"] = side.text.upper() 4434 if kind: 4435 kwargs["kind"] = kind.text.upper() 4436 if hint: 4437 kwargs["hint"] = hint 4438 4439 if self._match(TokenType.MATCH_CONDITION): 4440 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4441 4442 if self._match(TokenType.ON): 4443 kwargs["on"] = self._parse_disjunction() 4444 elif self._match(TokenType.USING): 4445 kwargs["using"] = self._parse_using_identifiers() 4446 elif ( 4447 not method 4448 and not (outer_apply or cross_apply) 4449 and not isinstance(kwargs["this"], exp.Unnest) 4450 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4451 ): 4452 index = self._index 4453 joins: list | None = list(self._parse_joins()) 4454 4455 if joins and self._match(TokenType.ON): 4456 kwargs["on"] = self._parse_disjunction() 4457 elif joins and self._match(TokenType.USING): 4458 kwargs["using"] = self._parse_using_identifiers() 4459 else: 4460 joins = None 4461 self._retreat(index) 4462 4463 kwargs["this"].set("joins", joins if joins else None) 4464 4465 kwargs["pivots"] = self._parse_pivots() 4466 4467 comments = [c for token in (method, side, kind) if token for c in token.comments] 4468 comments = (join_comments or []) + comments 4469 4470 if ( 4471 self.ADD_JOIN_ON_TRUE 4472 and not kwargs.get("on") 4473 and not kwargs.get("using") 4474 and not kwargs.get("method") 4475 and kwargs.get("kind") in (None, "INNER", "OUTER") 4476 ): 4477 kwargs["on"] = exp.true() 4478 4479 if directed: 4480 kwargs["directed"] = directed 4481 4482 return self.expression(exp.Join(**kwargs), comments=comments) 4483 4484 def _parse_opclass(self) -> exp.Expr | None: 4485 this = self._parse_disjunction() 4486 4487 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4488 return this 4489 4490 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4491 return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts())) 4492 4493 return this 4494 4495 def _parse_index_params(self) -> exp.IndexParameters: 4496 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4497 4498 if self._match(TokenType.L_PAREN, advance=False): 4499 columns = self._parse_wrapped_csv(self._parse_with_operator) 4500 else: 4501 columns = None 4502 4503 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4504 partition_by = self._parse_partition_by() 4505 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4506 tablespace = ( 4507 self._parse_var(any_token=True) 4508 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4509 else None 4510 ) 4511 where = self._parse_where() 4512 4513 on = self._parse_field() if self._match(TokenType.ON) else None 4514 4515 return self.expression( 4516 exp.IndexParameters( 4517 using=using, 4518 columns=columns, 4519 include=include, 4520 partition_by=partition_by, 4521 where=where, 4522 with_storage=with_storage, 4523 tablespace=tablespace, 4524 on=on, 4525 ) 4526 ) 4527 4528 def _parse_index( 4529 self, index: exp.Expr | None = None, anonymous: bool = False 4530 ) -> exp.Index | None: 4531 if index or anonymous: 4532 unique = None 4533 primary = None 4534 amp = None 4535 4536 self._match(TokenType.ON) 4537 self._match(TokenType.TABLE) # hive 4538 table = self._parse_table_parts(schema=True) 4539 else: 4540 unique = self._match(TokenType.UNIQUE) 4541 primary = self._match_text_seq("PRIMARY") 4542 amp = self._match_text_seq("AMP") 4543 4544 if not self._match(TokenType.INDEX): 4545 return None 4546 4547 index = self._parse_id_var() 4548 table = None 4549 4550 params = self._parse_index_params() 4551 4552 return self.expression( 4553 exp.Index( 4554 this=index, table=table, unique=unique, primary=primary, amp=amp, params=params 4555 ) 4556 ) 4557 4558 def _parse_table_hints(self) -> list[exp.Expr] | None: 4559 hints: list[exp.Expr] = [] 4560 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4561 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4562 hints.append( 4563 self.expression( 4564 exp.WithTableHint( 4565 expressions=self._parse_csv( 4566 lambda: self._parse_function() or self._parse_var(any_token=True) 4567 ) 4568 ) 4569 ) 4570 ) 4571 self._match_r_paren() 4572 else: 4573 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4574 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4575 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4576 4577 self._match_set((TokenType.INDEX, TokenType.KEY)) 4578 if self._match(TokenType.FOR): 4579 hint.set("target", self._advance_any() and self._prev.text.upper()) 4580 4581 hint.set("expressions", self._parse_wrapped_id_vars()) 4582 hints.append(hint) 4583 4584 return hints or None 4585 4586 def _parse_table_part(self, schema: bool = False) -> exp.Expr | None: 4587 return ( 4588 (not schema and self._parse_function(optional_parens=False)) 4589 or self._parse_id_var(any_token=False) 4590 or self._parse_string_as_identifier() 4591 or self._parse_placeholder() 4592 ) 4593 4594 def _parse_table_parts_fast(self) -> exp.Table | None: 4595 index = self._index 4596 parts: list[exp.Identifier] | None = None 4597 all_comments: list[str] | None = None 4598 4599 while self._match_set(self.IDENTIFIER_TOKENS): 4600 token = self._prev 4601 comments = self._prev_comments 4602 4603 has_dot = self._match(TokenType.DOT) 4604 curr_tt = self._curr.token_type 4605 4606 if not has_dot: 4607 if curr_tt in self.TABLE_POSTFIX_TOKENS: 4608 self._retreat(index) 4609 return None 4610 elif curr_tt not in self.IDENTIFIER_TOKENS: 4611 self._retreat(index) 4612 return None 4613 4614 if parts is None: 4615 parts = [] 4616 4617 if comments: 4618 if all_comments is None: 4619 all_comments = [] 4620 all_comments.extend(comments) 4621 self._prev_comments = [] 4622 4623 parts.append( 4624 self.expression( 4625 exp.Identifier( 4626 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 4627 ), 4628 token, 4629 ) 4630 ) 4631 4632 if not has_dot: 4633 break 4634 4635 if parts is None: 4636 return None 4637 4638 n = len(parts) 4639 4640 if n == 1: 4641 table: exp.Table = exp.Table(this=parts[0]) 4642 elif n == 2: 4643 table = exp.Table(this=parts[1], db=parts[0]) 4644 elif n >= 3: 4645 this: exp.Identifier | exp.Dot = parts[2] 4646 for i in range(3, n): 4647 this = exp.Dot(this=this, expression=parts[i]) 4648 4649 table = exp.Table(this=this, db=parts[1], catalog=parts[0]) 4650 4651 if table is None: 4652 self._retreat(index) 4653 elif all_comments: 4654 table.add_comments(all_comments) 4655 return table 4656 4657 def _parse_table_parts( 4658 self, 4659 schema: bool = False, 4660 is_db_reference: bool = False, 4661 wildcard: bool = False, 4662 fast: bool = False, 4663 ) -> exp.Table | exp.Dot | None: 4664 if fast: 4665 return self._parse_table_parts_fast() 4666 4667 catalog: exp.Expr | str | None = None 4668 db: exp.Expr | str | None = None 4669 table: exp.Expr | str | None = self._parse_table_part(schema=schema) 4670 4671 while self._match(TokenType.DOT): 4672 if catalog: 4673 # This allows nesting the table in arbitrarily many dot expressions if needed 4674 table = self.expression( 4675 exp.Dot(this=table, expression=self._parse_table_part(schema=schema)) 4676 ) 4677 else: 4678 catalog = db 4679 db = table 4680 # "" used for tsql FROM a..b case 4681 table = self._parse_table_part(schema=schema) or "" 4682 4683 if ( 4684 wildcard 4685 and self._is_connected() 4686 and (isinstance(table, exp.Identifier) or not table) 4687 and self._match(TokenType.STAR) 4688 ): 4689 if isinstance(table, exp.Identifier): 4690 table.args["this"] += "*" 4691 else: 4692 table = exp.Identifier(this="*") 4693 4694 if is_db_reference: 4695 catalog = db 4696 db = table 4697 table = None 4698 4699 if not table and not is_db_reference: 4700 self.raise_error(f"Expected table name but got {self._curr}") 4701 if not db and is_db_reference: 4702 self.raise_error(f"Expected database name but got {self._curr}") 4703 4704 table = self.expression(exp.Table(this=table, db=db, catalog=catalog)) 4705 4706 # Bubble up comments from identifier parts to the Table 4707 comments = [] 4708 for part in table.parts: 4709 if part_comments := part.pop_comments(): 4710 comments.extend(part_comments) 4711 if comments: 4712 table.add_comments(comments) 4713 4714 changes = self._parse_changes() 4715 if changes: 4716 table.set("changes", changes) 4717 4718 at_before = self._parse_historical_data() 4719 if at_before: 4720 table.set("when", at_before) 4721 4722 pivots = self._parse_pivots() 4723 if pivots: 4724 table.set("pivots", pivots) 4725 4726 return table 4727 4728 def _parse_table( 4729 self, 4730 schema: bool = False, 4731 joins: bool = False, 4732 alias_tokens: t.Collection[TokenType] | None = None, 4733 parse_bracket: bool = False, 4734 is_db_reference: bool = False, 4735 parse_partition: bool = False, 4736 consume_pipe: bool = False, 4737 ) -> exp.Expr | None: 4738 if not schema and not is_db_reference and not consume_pipe and not joins: 4739 index = self._index 4740 table = self._parse_table_parts(fast=True) 4741 4742 if table is not None: 4743 curr_tt = self._curr.token_type 4744 next_tt = self._next.token_type 4745 4746 fast_terminators = self.TABLE_TERMINATORS 4747 4748 # only return the table if we're sure there are no other operators 4749 # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT 4750 if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION: 4751 return table 4752 4753 postfix_tokens = self.TABLE_POSTFIX_TOKENS 4754 4755 if curr_tt not in postfix_tokens and next_tt not in postfix_tokens: 4756 if alias := self._parse_table_alias( 4757 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 4758 ): 4759 table.set("alias", alias) 4760 4761 if self._curr.token_type in fast_terminators: 4762 return table 4763 4764 self._retreat(index) 4765 4766 if stream := self._parse_stream(): 4767 return stream 4768 4769 if lateral := self._parse_lateral(): 4770 return lateral 4771 4772 if unnest := self._parse_unnest(): 4773 return unnest 4774 4775 if values := self._parse_derived_table_values(): 4776 return values 4777 4778 if subquery := self._parse_select(table=True, consume_pipe=consume_pipe): 4779 if not subquery.args.get("pivots"): 4780 subquery.set("pivots", self._parse_pivots()) 4781 if joins: 4782 for join in self._parse_joins(): 4783 subquery.append("joins", join) 4784 return subquery 4785 4786 bracket = parse_bracket and self._parse_bracket(None) 4787 bracket = self.expression(exp.Table(this=bracket)) if bracket else None 4788 4789 rows_from_tables = ( 4790 self._parse_wrapped_csv(self._parse_table) 4791 if self._match_text_seq("ROWS", "FROM") 4792 else None 4793 ) 4794 rows_from = ( 4795 self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None 4796 ) 4797 4798 only = self._match(TokenType.ONLY) 4799 4800 this = t.cast( 4801 exp.Expr, 4802 bracket 4803 or rows_from 4804 or self._parse_bracket( 4805 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4806 ), 4807 ) 4808 4809 if only: 4810 this.set("only", only) 4811 4812 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4813 self._match(TokenType.STAR) 4814 4815 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4816 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4817 this.set("partition", self._parse_partition()) 4818 4819 if schema: 4820 return self._parse_schema(this=this) 4821 4822 if self.dialect.ALIAS_POST_VERSION: 4823 this.set("version", self._parse_version()) 4824 4825 if self.dialect.ALIAS_POST_TABLESAMPLE: 4826 this.set("sample", self._parse_table_sample()) 4827 4828 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4829 if alias: 4830 this.set("alias", alias) 4831 4832 if self._match(TokenType.INDEXED_BY): 4833 this.set("indexed", self._parse_table_parts()) 4834 elif self._match_text_seq("NOT", "INDEXED"): 4835 this.set("indexed", False) 4836 4837 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4838 return self.expression( 4839 exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var()) 4840 ) 4841 4842 this.set("hints", self._parse_table_hints()) 4843 4844 if not this.args.get("pivots"): 4845 this.set("pivots", self._parse_pivots()) 4846 4847 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4848 this.set("sample", self._parse_table_sample()) 4849 4850 if not self.dialect.ALIAS_POST_VERSION: 4851 this.set("version", self._parse_version()) 4852 4853 if joins: 4854 for join in self._parse_joins(): 4855 this.append("joins", join) 4856 4857 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4858 this.set("ordinality", True) 4859 this.set("alias", self._parse_table_alias()) 4860 4861 return this 4862 4863 def _parse_version(self) -> exp.Version | None: 4864 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4865 this = "TIMESTAMP" 4866 elif self._match(TokenType.VERSION_SNAPSHOT): 4867 this = "VERSION" 4868 else: 4869 return None 4870 4871 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4872 kind = self._prev.text.upper() 4873 start = self._parse_bitwise() 4874 self._match_texts(("TO", "AND")) 4875 end = self._parse_bitwise() 4876 expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end])) 4877 elif self._match_text_seq("CONTAINED", "IN"): 4878 kind = "CONTAINED IN" 4879 expression = self.expression( 4880 exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise)) 4881 ) 4882 elif self._match(TokenType.ALL): 4883 kind = "ALL" 4884 expression = None 4885 else: 4886 self._match_text_seq("AS", "OF") 4887 kind = "AS OF" 4888 expression = self._parse_type() 4889 4890 return self.expression(exp.Version(this=this, expression=expression, kind=kind)) 4891 4892 def _parse_historical_data(self) -> exp.HistoricalData | None: 4893 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4894 index = self._index 4895 historical_data = None 4896 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4897 this = self._prev.text.upper() 4898 kind = ( 4899 self._match(TokenType.L_PAREN) 4900 and self._match_texts(self.HISTORICAL_DATA_KIND) 4901 and self._prev.text.upper() 4902 ) 4903 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4904 4905 if expression: 4906 self._match_r_paren() 4907 historical_data = self.expression( 4908 exp.HistoricalData(this=this, kind=kind, expression=expression) 4909 ) 4910 else: 4911 self._retreat(index) 4912 4913 return historical_data 4914 4915 def _parse_changes(self) -> exp.Changes | None: 4916 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4917 return None 4918 4919 information = self._parse_var(any_token=True) 4920 self._match_r_paren() 4921 4922 return self.expression( 4923 exp.Changes( 4924 information=information, 4925 at_before=self._parse_historical_data(), 4926 end=self._parse_historical_data(), 4927 ) 4928 ) 4929 4930 def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None: 4931 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4932 return None 4933 4934 self._advance() 4935 4936 expressions = self._parse_wrapped_csv(self._parse_equality) 4937 offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4938 4939 alias = self._parse_table_alias() if with_alias else None 4940 4941 if alias: 4942 if self.dialect.UNNEST_COLUMN_ONLY: 4943 if alias.args.get("columns"): 4944 self.raise_error("Unexpected extra column alias in unnest.") 4945 4946 alias.set("columns", [alias.this]) 4947 alias.set("this", None) 4948 4949 columns = alias.args.get("columns") or [] 4950 if offset and len(expressions) < len(columns): 4951 offset = columns.pop() 4952 4953 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4954 self._match(TokenType.ALIAS) 4955 offset = self._parse_id_var( 4956 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4957 ) or exp.to_identifier("offset") 4958 4959 return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset)) 4960 4961 def _parse_derived_table_values(self) -> exp.Values | None: 4962 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4963 if not is_derived and not ( 4964 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4965 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4966 ): 4967 return None 4968 4969 expressions = self._parse_csv(self._parse_value) 4970 alias = self._parse_table_alias() 4971 4972 if is_derived: 4973 self._match_r_paren() 4974 4975 return self.expression( 4976 exp.Values(expressions=expressions, alias=alias or self._parse_table_alias()) 4977 ) 4978 4979 def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None: 4980 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4981 as_modifier and self._match_text_seq("USING", "SAMPLE") 4982 ): 4983 return None 4984 4985 bucket_numerator = None 4986 bucket_denominator = None 4987 bucket_field = None 4988 percent = None 4989 size = None 4990 seed = None 4991 4992 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4993 matched_l_paren = self._match(TokenType.L_PAREN) 4994 4995 if self.TABLESAMPLE_CSV: 4996 num = None 4997 expressions = self._parse_csv(self._parse_primary) 4998 else: 4999 expressions = None 5000 num = ( 5001 self._parse_factor() 5002 if self._match(TokenType.NUMBER, advance=False) 5003 else self._parse_primary() or self._parse_placeholder() 5004 ) 5005 5006 if self._match_text_seq("BUCKET"): 5007 bucket_numerator = self._parse_number() 5008 self._match_text_seq("OUT", "OF") 5009 bucket_denominator = bucket_denominator = self._parse_number() 5010 self._match(TokenType.ON) 5011 bucket_field = self._parse_field() 5012 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 5013 percent = num 5014 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 5015 size = num 5016 else: 5017 percent = num 5018 5019 if matched_l_paren: 5020 self._match_r_paren() 5021 5022 if self._match(TokenType.L_PAREN): 5023 method = self._parse_var(upper=True) 5024 seed = self._match(TokenType.COMMA) and self._parse_number() 5025 self._match_r_paren() 5026 elif self._match_texts(("SEED", "REPEATABLE")): 5027 seed = self._parse_wrapped(self._parse_number) 5028 5029 if not method and self.DEFAULT_SAMPLING_METHOD: 5030 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 5031 5032 return self.expression( 5033 exp.TableSample( 5034 expressions=expressions, 5035 method=method, 5036 bucket_numerator=bucket_numerator, 5037 bucket_denominator=bucket_denominator, 5038 bucket_field=bucket_field, 5039 percent=percent, 5040 size=size, 5041 seed=seed, 5042 ) 5043 ) 5044 5045 def _parse_pivots(self) -> list[exp.Pivot] | None: 5046 if self._curr.token_type not in (TokenType.PIVOT, TokenType.UNPIVOT): 5047 return None 5048 return list(iter(self._parse_pivot, None)) or None 5049 5050 def _parse_joins(self) -> t.Iterator[exp.Join]: 5051 return iter(self._parse_join, None) 5052 5053 def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None: 5054 if not self._match(TokenType.INTO): 5055 return None 5056 5057 return self.expression( 5058 exp.UnpivotColumns( 5059 this=self._match_text_seq("NAME") and self._parse_column(), 5060 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 5061 ) 5062 ) 5063 5064 # https://duckdb.org/docs/sql/statements/pivot 5065 def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot: 5066 def _parse_on() -> exp.Expr | None: 5067 this = self._parse_bitwise() 5068 5069 if self._match(TokenType.IN): 5070 # PIVOT ... ON col IN (row_val1, row_val2) 5071 return self._parse_in(this) 5072 if self._match(TokenType.ALIAS, advance=False): 5073 # UNPIVOT ... ON (col1, col2, col3) AS row_val 5074 return self._parse_alias(this) 5075 5076 return this 5077 5078 this = self._parse_table() 5079 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 5080 into = self._parse_unpivot_columns() 5081 using = self._match(TokenType.USING) and self._parse_csv( 5082 lambda: self._parse_alias(self._parse_column()) 5083 ) 5084 group = self._parse_group() 5085 5086 return self.expression( 5087 exp.Pivot( 5088 this=this, 5089 expressions=expressions, 5090 using=using, 5091 group=group, 5092 unpivot=is_unpivot, 5093 into=into, 5094 ) 5095 ) 5096 5097 def _parse_pivot_in(self) -> exp.In: 5098 def _parse_aliased_expression() -> exp.Expr | None: 5099 this = self._parse_select_or_expression() 5100 5101 self._match(TokenType.ALIAS) 5102 alias = self._parse_bitwise() 5103 if alias: 5104 if isinstance(alias, exp.Column) and not alias.db: 5105 alias = alias.this 5106 return self.expression(exp.PivotAlias(this=this, alias=alias)) 5107 5108 return this 5109 5110 value = self._parse_column() 5111 5112 if not self._match(TokenType.IN): 5113 self.raise_error("Expecting IN") 5114 5115 if self._match(TokenType.L_PAREN): 5116 if self._match(TokenType.ANY): 5117 exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order())) 5118 else: 5119 exprs = self._parse_csv(_parse_aliased_expression) 5120 self._match_r_paren() 5121 return self.expression(exp.In(this=value, expressions=exprs)) 5122 5123 return self.expression(exp.In(this=value, field=self._parse_id_var())) 5124 5125 def _parse_pivot_aggregation(self) -> exp.Expr | None: 5126 func = self._parse_function() 5127 if not func: 5128 if self._prev.token_type == TokenType.COMMA: 5129 return None 5130 self.raise_error("Expecting an aggregation function in PIVOT") 5131 5132 return self._parse_alias(func) 5133 5134 def _parse_pivot(self) -> exp.Pivot | None: 5135 index = self._index 5136 include_nulls = None 5137 5138 if self._match(TokenType.PIVOT): 5139 unpivot = False 5140 elif self._match(TokenType.UNPIVOT): 5141 unpivot = True 5142 5143 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 5144 if self._match_text_seq("INCLUDE", "NULLS"): 5145 include_nulls = True 5146 elif self._match_text_seq("EXCLUDE", "NULLS"): 5147 include_nulls = False 5148 else: 5149 return None 5150 5151 expressions = [] 5152 5153 if not self._match(TokenType.L_PAREN): 5154 self._retreat(index) 5155 return None 5156 5157 if unpivot: 5158 expressions = self._parse_csv(self._parse_column) 5159 else: 5160 expressions = self._parse_csv(self._parse_pivot_aggregation) 5161 5162 if not expressions: 5163 self.raise_error("Failed to parse PIVOT's aggregation list") 5164 5165 if not self._match(TokenType.FOR): 5166 self.raise_error("Expecting FOR") 5167 5168 fields = [] 5169 while True: 5170 field = self._try_parse(self._parse_pivot_in) 5171 if not field: 5172 break 5173 fields.append(field) 5174 5175 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 5176 self._parse_bitwise 5177 ) 5178 5179 group = self._parse_group() 5180 5181 self._match_r_paren() 5182 5183 pivot = self.expression( 5184 exp.Pivot( 5185 expressions=expressions, 5186 fields=fields, 5187 unpivot=unpivot, 5188 include_nulls=include_nulls, 5189 default_on_null=default_on_null, 5190 group=group, 5191 ) 5192 ) 5193 5194 if unpivot: 5195 pivot.set("expressions", [_unpivot_target(e) for e in pivot.expressions]) 5196 for pivot_field in pivot.fields: 5197 if isinstance(pivot_field, exp.In): 5198 pivot_field.set("this", _unpivot_target(pivot_field.this)) 5199 5200 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 5201 pivot.set("alias", self._parse_table_alias()) 5202 5203 if not unpivot: 5204 names = self._pivot_column_names(t.cast(list[exp.Expr], expressions)) 5205 5206 columns: list[exp.Expr] = [] 5207 all_fields = [] 5208 for pivot_field in pivot.fields: 5209 pivot_field_expressions = pivot_field.expressions 5210 5211 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 5212 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 5213 continue 5214 5215 all_fields.append( 5216 [ 5217 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 5218 for fld in pivot_field_expressions 5219 ] 5220 ) 5221 5222 if all_fields: 5223 if names: 5224 all_fields.append(names) 5225 5226 # Generate all possible combinations of the pivot columns 5227 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 5228 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 5229 for fld_parts_tuple in itertools.product(*all_fields): 5230 fld_parts = list(fld_parts_tuple) 5231 5232 if names and self.PREFIXED_PIVOT_COLUMNS: 5233 # Move the "name" to the front of the list 5234 fld_parts.insert(0, fld_parts.pop(-1)) 5235 5236 columns.append(exp.to_identifier("_".join(fld_parts))) 5237 5238 pivot.set("columns", columns) 5239 5240 return pivot 5241 5242 def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]: 5243 return [agg.alias for agg in aggregations if agg.alias] 5244 5245 def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None: 5246 if not skip_where_token and not self._match(TokenType.PREWHERE): 5247 return None 5248 5249 comments = self._prev_comments 5250 return self.expression( 5251 exp.PreWhere(this=self._parse_disjunction()), 5252 comments=comments, 5253 ) 5254 5255 def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None: 5256 if not skip_where_token and not self._match(TokenType.WHERE): 5257 return None 5258 5259 comments = self._prev_comments 5260 return self.expression( 5261 exp.Where(this=self._parse_disjunction()), 5262 comments=comments, 5263 ) 5264 5265 def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None: 5266 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 5267 return None 5268 comments = self._prev_comments 5269 5270 elements: dict[str, t.Any] = defaultdict(list) 5271 5272 if self._match(TokenType.ALL): 5273 elements["all"] = True 5274 elif self._match(TokenType.DISTINCT): 5275 elements["all"] = False 5276 5277 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 5278 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5279 5280 while True: 5281 index = self._index 5282 5283 elements["expressions"].extend( 5284 self._parse_csv( 5285 lambda: ( 5286 None 5287 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 5288 else self._parse_disjunction() 5289 ) 5290 ) 5291 ) 5292 5293 before_with_index = self._index 5294 with_prefix = self._match(TokenType.WITH) 5295 5296 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 5297 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 5298 elements[key].append(cube_or_rollup) 5299 elif grouping_sets := self._parse_grouping_sets(): 5300 elements["grouping_sets"].append(grouping_sets) 5301 elif self._match_text_seq("TOTALS"): 5302 elements["totals"] = True # type: ignore 5303 5304 if before_with_index <= self._index <= before_with_index + 1: 5305 self._retreat(before_with_index) 5306 break 5307 5308 if index == self._index: 5309 break 5310 5311 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5312 5313 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None: 5314 if self._match(TokenType.CUBE): 5315 kind: type[exp.Cube | exp.Rollup] = exp.Cube 5316 elif self._match(TokenType.ROLLUP): 5317 kind = exp.Rollup 5318 else: 5319 return None 5320 5321 return self.expression( 5322 kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise)) 5323 ) 5324 5325 def _parse_grouping_sets(self) -> exp.GroupingSets | None: 5326 if self._match(TokenType.GROUPING_SETS): 5327 return self.expression( 5328 exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set)) 5329 ) 5330 return None 5331 5332 def _parse_grouping_set(self) -> exp.Expr | None: 5333 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 5334 5335 def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None: 5336 if not skip_having_token and not self._match(TokenType.HAVING): 5337 return None 5338 comments = self._prev_comments 5339 return self.expression( 5340 exp.Having(this=self._parse_disjunction()), 5341 comments=comments, 5342 ) 5343 5344 def _parse_qualify(self) -> exp.Qualify | None: 5345 if not self._match(TokenType.QUALIFY): 5346 return None 5347 return self.expression(exp.Qualify(this=self._parse_disjunction())) 5348 5349 def _parse_connect_with_prior(self) -> exp.Expr | None: 5350 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 5351 exp.Prior(this=self._parse_bitwise()) 5352 ) 5353 connect = self._parse_disjunction() 5354 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 5355 return connect 5356 5357 def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None: 5358 if skip_start_token: 5359 start = None 5360 elif self._match(TokenType.START_WITH): 5361 start = self._parse_disjunction() 5362 else: 5363 return None 5364 5365 self._match(TokenType.CONNECT_BY) 5366 nocycle = self._match_text_seq("NOCYCLE") 5367 connect = self._parse_connect_with_prior() 5368 5369 if not start and self._match(TokenType.START_WITH): 5370 start = self._parse_disjunction() 5371 5372 return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle)) 5373 5374 def _parse_name_as_expression(self) -> exp.Expr | None: 5375 this = self._parse_id_var(any_token=True) 5376 if self._match(TokenType.ALIAS): 5377 this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction())) 5378 return this 5379 5380 def _parse_interpolate(self) -> list[exp.Expr] | None: 5381 if self._match_text_seq("INTERPOLATE"): 5382 return self._parse_wrapped_csv(self._parse_name_as_expression) 5383 return None 5384 5385 def _parse_order( 5386 self, this: exp.Expr | None = None, skip_order_token: bool = False 5387 ) -> exp.Expr | None: 5388 siblings = None 5389 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5390 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5391 return this 5392 5393 siblings = True 5394 5395 comments = self._prev_comments 5396 return self.expression( 5397 exp.Order( 5398 this=this, 5399 expressions=self._parse_csv(self._parse_ordered), 5400 siblings=siblings, 5401 ), 5402 comments=comments, 5403 ) 5404 5405 def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None: 5406 if not self._match(token): 5407 return None 5408 return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered))) 5409 5410 def _parse_ordered( 5411 self, parse_method: t.Callable[[], exp.Expr | None] | None = None 5412 ) -> exp.Ordered | None: 5413 this = parse_method() if parse_method else self._parse_disjunction() 5414 if not this: 5415 return None 5416 5417 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5418 this = exp.var("ALL") 5419 5420 asc = self._match(TokenType.ASC) 5421 desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None) 5422 5423 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5424 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5425 5426 nulls_first = is_nulls_first or False 5427 explicitly_null_ordered = is_nulls_first or is_nulls_last 5428 5429 if ( 5430 not explicitly_null_ordered 5431 and ( 5432 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5433 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5434 ) 5435 and self.dialect.NULL_ORDERING != "nulls_are_last" 5436 ): 5437 nulls_first = True 5438 5439 if self._match_text_seq("WITH", "FILL"): 5440 with_fill = self.expression( 5441 exp.WithFill( 5442 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5443 to=self._match_text_seq("TO") and self._parse_bitwise(), 5444 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5445 interpolate=self._parse_interpolate(), 5446 ) 5447 ) 5448 else: 5449 with_fill = None 5450 5451 return self.expression( 5452 exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill) 5453 ) 5454 5455 def _parse_limit_options(self) -> exp.LimitOptions | None: 5456 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5457 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5458 self._match_text_seq("ONLY") 5459 with_ties = self._match_text_seq("WITH", "TIES") 5460 5461 if not (percent or rows or with_ties): 5462 return None 5463 5464 return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties)) 5465 5466 def _parse_limit( 5467 self, 5468 this: exp.Expr | None = None, 5469 top: bool = False, 5470 skip_limit_token: bool = False, 5471 ) -> exp.Expr | None: 5472 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5473 comments = self._prev_comments 5474 if top: 5475 limit_paren = self._match(TokenType.L_PAREN) 5476 expression = ( 5477 self._parse_term() or self._parse_select() 5478 if limit_paren 5479 else self._parse_number() 5480 ) 5481 5482 if limit_paren: 5483 self._match_r_paren() 5484 5485 else: 5486 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5487 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5488 # consume the factor plus parse the percentage separately 5489 index = self._index 5490 expression = self._try_parse(self._parse_term) 5491 if isinstance(expression, exp.Mod): 5492 self._retreat(index) 5493 expression = self._parse_factor() 5494 elif not expression: 5495 expression = self._parse_factor() 5496 limit_options = self._parse_limit_options() 5497 5498 if self._match(TokenType.COMMA): 5499 offset = expression 5500 expression = self._parse_term() 5501 else: 5502 offset = None 5503 5504 limit_exp = self.expression( 5505 exp.Limit( 5506 this=this, 5507 expression=expression, 5508 offset=offset, 5509 limit_options=limit_options, 5510 expressions=self._parse_limit_by(), 5511 ), 5512 comments=comments, 5513 ) 5514 5515 return limit_exp 5516 5517 if self._match(TokenType.FETCH): 5518 direction = ( 5519 self._prev.text.upper() 5520 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5521 else "FIRST" 5522 ) 5523 5524 count = self._parse_field(tokens=self.FETCH_TOKENS) 5525 5526 return self.expression( 5527 exp.Fetch( 5528 direction=direction, count=count, limit_options=self._parse_limit_options() 5529 ) 5530 ) 5531 5532 return this 5533 5534 def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None: 5535 if not self._match(TokenType.OFFSET): 5536 return this 5537 5538 count = self._parse_term() 5539 self._match_set((TokenType.ROW, TokenType.ROWS)) 5540 5541 return self.expression( 5542 exp.Offset(this=this, expression=count, expressions=self._parse_limit_by()) 5543 ) 5544 5545 def _can_parse_limit_or_offset(self) -> bool: 5546 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5547 return False 5548 5549 index = self._index 5550 result = bool( 5551 self._try_parse(self._parse_limit, retreat=True) 5552 or self._try_parse(self._parse_offset, retreat=True) 5553 ) 5554 self._retreat(index) 5555 5556 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5557 if self._next.token_type == TokenType.MATCH_CONDITION: 5558 result = False 5559 5560 return result 5561 5562 def _can_parse_named_window(self) -> bool: 5563 # `WINDOW` is in ID_VAR_TOKENS so it could be mistakenly consumed as an implicit alias. 5564 # Refuse only when the following tokens look like a named-window clause: `WINDOW <id> AS (`. 5565 if not self._match(TokenType.WINDOW, advance=False): 5566 return False 5567 5568 name = self._tokens[self._index + 1] if self._index + 1 < len(self._tokens) else None 5569 if name is None or name.token_type not in self.ID_VAR_TOKENS: 5570 return False 5571 5572 alias_tok = self._tokens[self._index + 2] if self._index + 2 < len(self._tokens) else None 5573 if alias_tok is None or alias_tok.token_type != TokenType.ALIAS: 5574 return False 5575 5576 body = self._tokens[self._index + 3] if self._index + 3 < len(self._tokens) else None 5577 return body is not None and body.token_type == TokenType.L_PAREN 5578 5579 def _parse_limit_by(self) -> list[exp.Expr] | None: 5580 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5581 5582 def _parse_locks(self) -> list[exp.Lock]: 5583 locks = [] 5584 while True: 5585 update, key = None, None 5586 if self._match_text_seq("FOR", "UPDATE"): 5587 update = True 5588 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5589 "LOCK", "IN", "SHARE", "MODE" 5590 ): 5591 update = False 5592 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5593 update, key = False, True 5594 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5595 update, key = True, True 5596 else: 5597 break 5598 5599 expressions = None 5600 if self._match_text_seq("OF"): 5601 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5602 5603 wait: bool | exp.Expr | None = None 5604 if self._match_text_seq("NOWAIT"): 5605 wait = True 5606 elif self._match_text_seq("WAIT"): 5607 wait = self._parse_primary() 5608 elif self._match_text_seq("SKIP", "LOCKED"): 5609 wait = False 5610 5611 locks.append( 5612 self.expression( 5613 exp.Lock(update=update, expressions=expressions, wait=wait, key=key) 5614 ) 5615 ) 5616 5617 return locks 5618 5619 def parse_set_operation( 5620 self, this: exp.Expr | None, consume_pipe: bool = False 5621 ) -> exp.Expr | None: 5622 start = self._index 5623 _, side_token, kind_token = self._parse_join_parts() 5624 5625 side = side_token.text if side_token else None 5626 kind = kind_token.text if kind_token else None 5627 5628 if not self._match_set(self.SET_OPERATIONS): 5629 self._retreat(start) 5630 return None 5631 5632 token_type = self._prev.token_type 5633 5634 if token_type == TokenType.UNION: 5635 operation: type[exp.SetOperation] = exp.Union 5636 elif token_type == TokenType.EXCEPT: 5637 operation = exp.Except 5638 else: 5639 operation = exp.Intersect 5640 5641 comments = self._prev.comments 5642 5643 if self._match(TokenType.DISTINCT): 5644 distinct: bool | None = True 5645 elif self._match(TokenType.ALL): 5646 distinct = False 5647 else: 5648 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5649 if distinct is None: 5650 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5651 5652 by_name = ( 5653 self._match_text_seq("BY", "NAME") 5654 or self._match_text_seq("STRICT", "CORRESPONDING") 5655 or None 5656 ) 5657 if self._match_text_seq("CORRESPONDING"): 5658 by_name = True 5659 if not side and not kind: 5660 kind = "INNER" 5661 5662 on_column_list = None 5663 if by_name and self._match_texts(("ON", "BY")): 5664 on_column_list = self._parse_wrapped_csv(self._parse_column) 5665 5666 expression = self._parse_select( 5667 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5668 ) 5669 5670 return self.expression( 5671 operation( 5672 this=this, 5673 distinct=distinct, 5674 by_name=by_name, 5675 expression=expression, 5676 side=side, 5677 kind=kind, 5678 on=on_column_list, 5679 ), 5680 comments=comments, 5681 ) 5682 5683 def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None: 5684 while this: 5685 setop = self.parse_set_operation(this) 5686 if not setop: 5687 break 5688 this = setop 5689 5690 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5691 expression = this.expression 5692 5693 if expression: 5694 for arg in self.SET_OP_MODIFIERS: 5695 expr = expression.args.get(arg) 5696 if expr: 5697 this.set(arg, expr.pop()) 5698 5699 return this 5700 5701 def _parse_expression(self) -> exp.Expr | None: 5702 return self._parse_alias(self._parse_assignment()) 5703 5704 def _parse_assignment(self) -> exp.Expr | None: 5705 this = self._parse_disjunction() 5706 if not this and self._next.token_type in self.ASSIGNMENT: 5707 # This allows us to parse <non-identifier token> := <expr> 5708 this = exp.column( 5709 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5710 ) 5711 5712 while self._match_set(self.ASSIGNMENT): 5713 if isinstance(this, exp.Column) and len(this.parts) == 1: 5714 this = this.this 5715 5716 comments = self._prev_comments 5717 this = self.expression( 5718 self.ASSIGNMENT[self._prev.token_type]( 5719 this=this, expression=self._parse_assignment() 5720 ), 5721 comments=comments, 5722 ) 5723 5724 return this 5725 5726 def _parse_disjunction(self) -> exp.Expr | None: 5727 this = self._parse_conjunction() 5728 while self._match_set(self.DISJUNCTION): 5729 comments = self._prev_comments 5730 this = self.expression( 5731 self.DISJUNCTION[self._prev.token_type]( 5732 this=this, expression=self._parse_conjunction() 5733 ), 5734 comments=comments, 5735 ) 5736 return this 5737 5738 def _parse_conjunction(self) -> exp.Expr | None: 5739 this = self._parse_equality() 5740 while self._match_set(self.CONJUNCTION): 5741 comments = self._prev_comments 5742 this = self.expression( 5743 self.CONJUNCTION[self._prev.token_type]( 5744 this=this, expression=self._parse_equality() 5745 ), 5746 comments=comments, 5747 ) 5748 return this 5749 5750 def _parse_equality(self) -> exp.Expr | None: 5751 this = self._parse_comparison() 5752 while self._match_set(self.EQUALITY): 5753 comments = self._prev_comments 5754 this = self.expression( 5755 self.EQUALITY[self._prev.token_type]( 5756 this=this, expression=self._parse_comparison() 5757 ), 5758 comments=comments, 5759 ) 5760 return this 5761 5762 def _parse_comparison(self) -> exp.Expr | None: 5763 this = self._parse_range() 5764 while self._match_set(self.COMPARISON): 5765 comments = self._prev_comments 5766 this = self.expression( 5767 self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()), 5768 comments=comments, 5769 ) 5770 return this 5771 5772 def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5773 this = this or self._parse_bitwise() 5774 negate = self._match(TokenType.NOT) 5775 5776 if self._match_set(self.RANGE_PARSERS): 5777 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5778 if not expression: 5779 return this 5780 5781 this = expression 5782 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5783 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5784 5785 # Postgres supports ISNULL and NOTNULL for conditions. 5786 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5787 if self._match(TokenType.NOTNULL): 5788 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5789 this = self.expression(exp.Not(this=this)) 5790 5791 if negate: 5792 this = self._negate_range(this) 5793 5794 if self._match(TokenType.IS): 5795 this = self._parse_is(this) 5796 5797 return this 5798 5799 def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5800 if not this: 5801 return this 5802 5803 expression = this.this if isinstance(this, exp.Escape) else this 5804 if isinstance(expression, (exp.Like, exp.ILike)): 5805 expression.set("negate", True) 5806 return this 5807 5808 return self.expression(exp.Not(this=this)) 5809 5810 def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None: 5811 index = self._index - 1 5812 negate = self._match(TokenType.NOT) 5813 5814 if self._match_text_seq("DISTINCT", "FROM"): 5815 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5816 return self.expression(klass(this=this, expression=self._parse_bitwise())) 5817 5818 if self._match(TokenType.JSON): 5819 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5820 5821 if self._match_text_seq("WITH"): 5822 _with = True 5823 elif self._match_text_seq("WITHOUT"): 5824 _with = False 5825 else: 5826 _with = None 5827 5828 unique = self._match(TokenType.UNIQUE) 5829 self._match_text_seq("KEYS") 5830 expression: exp.Expr | None = self.expression( 5831 exp.JSON(this=kind, with_=_with, unique=unique) 5832 ) 5833 else: 5834 expression = self._parse_null() or self._parse_bitwise() 5835 if not expression: 5836 self._retreat(index) 5837 return None 5838 5839 this = self.expression(exp.Is(this=this, expression=expression)) 5840 this = self.expression(exp.Not(this=this)) if negate else this 5841 return self._parse_column_ops(this) 5842 5843 def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In: 5844 unnest = self._parse_unnest(with_alias=False) 5845 if unnest: 5846 this = self.expression(exp.In(this=this, unnest=unnest)) 5847 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5848 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5849 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5850 5851 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5852 this = self.expression( 5853 exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False)) 5854 ) 5855 else: 5856 this = self.expression(exp.In(this=this, expressions=expressions)) 5857 5858 if matched_l_paren: 5859 self._match_r_paren(this) 5860 elif not self._match(TokenType.R_BRACKET, expression=this): 5861 self.raise_error("Expecting ]") 5862 else: 5863 this = self.expression(exp.In(this=this, field=self._parse_column())) 5864 5865 return this 5866 5867 def _parse_between(self, this: exp.Expr | None) -> exp.Between: 5868 symmetric = None 5869 if self._match_text_seq("SYMMETRIC"): 5870 symmetric = True 5871 elif self._match_text_seq("ASYMMETRIC"): 5872 symmetric = False 5873 5874 low = self._parse_bitwise() 5875 self._match(TokenType.AND) 5876 high = self._parse_bitwise() 5877 5878 return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric)) 5879 5880 def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None: 5881 if not self._match(TokenType.ESCAPE): 5882 return this 5883 return self.expression( 5884 exp.Escape(this=this, expression=self._parse_string() or self._parse_null()) 5885 ) 5886 5887 def _parse_interval_span(self, this: exp.Expr) -> exp.Interval: 5888 # handle day-time format interval span with omitted units: 5889 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5890 interval_span_units_omitted = None 5891 if ( 5892 this 5893 and this.is_string 5894 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5895 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5896 ): 5897 index = self._index 5898 5899 # Var "TO" Var 5900 first_unit = self._parse_var(any_token=True, upper=True) 5901 second_unit = None 5902 if first_unit and self._match_text_seq("TO"): 5903 second_unit = self._parse_var(any_token=True, upper=True) 5904 5905 interval_span_units_omitted = not (first_unit and second_unit) 5906 5907 self._retreat(index) 5908 5909 if interval_span_units_omitted: 5910 unit = None 5911 else: 5912 unit = self._parse_function() 5913 if not unit and ( 5914 self._curr.token_type == TokenType.VAR 5915 or self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS 5916 ): 5917 unit = self._parse_var(any_token=True, upper=True) 5918 5919 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5920 # each INTERVAL expression into this canonical form so it's easy to transpile 5921 if this and this.is_number: 5922 this = exp.Literal.string(this.to_py()) 5923 elif this and this.is_string: 5924 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5925 if parts and unit: 5926 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5927 unit = None 5928 self._retreat(self._index - 1) 5929 5930 if len(parts) == 1: 5931 this = exp.Literal.string(parts[0][0]) 5932 unit = self.expression(exp.Var(this=parts[0][1].upper())) 5933 5934 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5935 unit = self.expression( 5936 exp.IntervalSpan( 5937 this=unit, 5938 expression=self._parse_function() 5939 or self._parse_var(any_token=True, upper=True), 5940 ) 5941 ) 5942 5943 return self.expression(exp.Interval(this=this, unit=unit)) 5944 5945 def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None: 5946 index = self._index 5947 5948 if not self._match(TokenType.INTERVAL) and require_interval: 5949 return None 5950 5951 if self._match(TokenType.STRING, advance=False): 5952 this = self._parse_primary() 5953 else: 5954 this = self._parse_term() 5955 5956 if not this or ( 5957 isinstance(this, exp.Column) 5958 and not this.table 5959 and not this.this.quoted 5960 and self._curr 5961 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5962 ): 5963 self._retreat(index) 5964 return None 5965 5966 interval = self._parse_interval_span(this) 5967 5968 index = self._index 5969 self._match(TokenType.PLUS) 5970 5971 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5972 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5973 return self.expression(exp.Add(this=interval, expression=self._parse_interval(False))) 5974 5975 self._retreat(index) 5976 return interval 5977 5978 def _parse_bitwise(self) -> exp.Expr | None: 5979 this = self._parse_term() 5980 5981 while True: 5982 if self._match_set(self.BITWISE): 5983 this = self.expression( 5984 self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term()) 5985 ) 5986 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5987 this = self.expression( 5988 exp.DPipe( 5989 this=this, 5990 expression=self._parse_term(), 5991 safe=not self.dialect.STRICT_STRING_CONCAT, 5992 ) 5993 ) 5994 elif self._match(TokenType.DQMARK): 5995 this = self.expression( 5996 exp.Coalesce(this=this, expressions=ensure_list(self._parse_term())) 5997 ) 5998 elif self._match_pair(TokenType.LT, TokenType.LT): 5999 this = self.expression( 6000 exp.BitwiseLeftShift(this=this, expression=self._parse_term()) 6001 ) 6002 elif self._match_pair(TokenType.GT, TokenType.GT): 6003 this = self.expression( 6004 exp.BitwiseRightShift(this=this, expression=self._parse_term()) 6005 ) 6006 else: 6007 break 6008 6009 return this 6010 6011 def _parse_term(self) -> exp.Expr | None: 6012 this = self._parse_factor() 6013 6014 while self._match_set(self.TERM): 6015 klass = self.TERM[self._prev.token_type] 6016 comments = self._prev_comments 6017 expression = self._parse_factor() 6018 6019 this = self.expression(klass(this=this, expression=expression), comments=comments) 6020 6021 if isinstance(this, exp.Collate): 6022 expr = this.expression 6023 6024 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 6025 # fallback to Identifier / Var 6026 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 6027 ident = expr.this 6028 if isinstance(ident, exp.Identifier): 6029 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 6030 6031 return this 6032 6033 def _parse_factor(self) -> exp.Expr | None: 6034 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 6035 this = self._parse_at_time_zone(parse_method()) 6036 6037 while self._match_set(self.FACTOR): 6038 klass = self.FACTOR[self._prev.token_type] 6039 comments = self._prev_comments 6040 expression = parse_method() 6041 6042 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 6043 self._retreat(self._index - 1) 6044 return this 6045 6046 this = self.expression(klass(this=this, expression=expression), comments=comments) 6047 6048 if isinstance(this, exp.Div): 6049 this.set("typed", self.dialect.TYPED_DIVISION) 6050 this.set("safe", self.dialect.SAFE_DIVISION) 6051 6052 return this 6053 6054 def _parse_exponent(self) -> exp.Expr | None: 6055 this = self._parse_unary() 6056 while self._match_set(self.EXPONENT): 6057 comments = self._prev_comments 6058 this = self.expression( 6059 self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()), 6060 comments=comments, 6061 ) 6062 return this 6063 6064 def _parse_unary(self) -> exp.Expr | None: 6065 if self._match_set(self.UNARY_PARSERS): 6066 return self.UNARY_PARSERS[self._prev.token_type](self) 6067 return self._parse_type() 6068 6069 def _parse_type( 6070 self, parse_interval: bool = True, fallback_to_identifier: bool = False 6071 ) -> exp.Expr | None: 6072 if not fallback_to_identifier and (atom := self._parse_atom()) is not None: 6073 return atom 6074 6075 if interval := parse_interval and self._parse_interval(): 6076 return self._parse_column_ops(interval) 6077 6078 index = self._index 6079 data_type = self._parse_types(check_func=True, allow_identifiers=False) 6080 6081 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 6082 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 6083 if isinstance(data_type, exp.Cast): 6084 # This constructor can contain ops directly after it, for instance struct unnesting: 6085 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 6086 return self._parse_column_ops(data_type) 6087 6088 if data_type: 6089 index2 = self._index 6090 this = self._parse_primary() 6091 6092 if isinstance(this, exp.Literal): 6093 literal = this.name 6094 this = self._parse_column_ops(this) 6095 6096 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 6097 if parser: 6098 return parser(self, this, data_type) 6099 6100 if ( 6101 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 6102 and data_type.is_type(exp.DType.TIMESTAMP) 6103 and TIME_ZONE_RE.search(literal) 6104 ): 6105 data_type = exp.DType.TIMESTAMPTZ.into_expr() 6106 6107 return self.expression(exp.Cast(this=this, to=data_type)) 6108 6109 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 6110 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 6111 # 6112 # If the index difference here is greater than 1, that means the parser itself must have 6113 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 6114 # 6115 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 6116 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 6117 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 6118 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 6119 # 6120 # In these cases, we don't really want to return the converted type, but instead retreat 6121 # and try to parse a Column or Identifier in the section below. 6122 if data_type.expressions and index2 - index > 1: 6123 self._retreat(index2) 6124 return self._parse_column_ops(data_type) 6125 6126 self._retreat(index) 6127 6128 if fallback_to_identifier: 6129 return self._parse_id_var() 6130 6131 return self._parse_column() 6132 6133 def _parse_type_size(self) -> exp.DataTypeParam | None: 6134 this = self._parse_type() 6135 if not this: 6136 return None 6137 6138 if isinstance(this, exp.Column) and not this.table: 6139 this = exp.var(this.name.upper()) 6140 6141 return self.expression( 6142 exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True)) 6143 ) 6144 6145 def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None: 6146 type_name = identifier.name 6147 6148 while self._match(TokenType.DOT): 6149 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 6150 6151 return exp.DataType.from_str(type_name, dialect=self.dialect, udt=True) 6152 6153 def _parse_types( 6154 self, 6155 check_func: bool = False, 6156 schema: bool = False, 6157 allow_identifiers: bool = True, 6158 with_collation: bool = False, 6159 ) -> exp.Expr | None: 6160 index = self._index 6161 this: exp.Expr | None = None 6162 6163 if self._match_set(self.TYPE_TOKENS): 6164 type_token = self._prev.token_type 6165 else: 6166 type_token = None 6167 identifier = allow_identifiers and self._parse_id_var( 6168 any_token=False, tokens=(TokenType.VAR,) 6169 ) 6170 if isinstance(identifier, exp.Identifier): 6171 try: 6172 tokens = self.dialect.tokenize(identifier.name) 6173 except TokenError: 6174 tokens = None 6175 6176 if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS: 6177 if len(tokens) > 1: 6178 return exp.DataType.from_str(identifier.name, dialect=self.dialect) 6179 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 6180 this = self._parse_user_defined_type(identifier) 6181 else: 6182 self._retreat(self._index - 1) 6183 return None 6184 else: 6185 return None 6186 6187 if type_token == TokenType.PSEUDO_TYPE: 6188 return self.expression(exp.PseudoType(this=self._prev.text.upper())) 6189 6190 if type_token == TokenType.OBJECT_IDENTIFIER: 6191 return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper())) 6192 6193 # https://materialize.com/docs/sql/types/map/ 6194 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 6195 key_type = self._parse_types( 6196 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6197 ) 6198 if not self._match(TokenType.FARROW): 6199 self._retreat(index) 6200 return None 6201 6202 value_type = self._parse_types( 6203 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6204 ) 6205 if not self._match(TokenType.R_BRACKET): 6206 self._retreat(index) 6207 return None 6208 6209 return exp.DataType( 6210 this=exp.DType.MAP, 6211 expressions=[key_type, value_type], 6212 nested=True, 6213 ) 6214 6215 nested = type_token in self.NESTED_TYPE_TOKENS 6216 is_struct = type_token in self.STRUCT_TYPE_TOKENS 6217 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 6218 expressions = None 6219 maybe_func = False 6220 6221 if self._match(TokenType.L_PAREN): 6222 if is_struct: 6223 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6224 elif nested: 6225 expressions = self._parse_csv( 6226 lambda: self._parse_types( 6227 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6228 ) 6229 ) 6230 if type_token == TokenType.NULLABLE and len(expressions) == 1: 6231 this = expressions[0] 6232 this.set("nullable", True) 6233 self._match_r_paren() 6234 return this 6235 elif type_token in self.ENUM_TYPE_TOKENS: 6236 expressions = self._parse_csv(self._parse_equality) 6237 elif type_token == TokenType.JSON: 6238 # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value) 6239 # https://clickhouse.com/docs/sql-reference/data-types/newjson 6240 expressions = self._parse_csv(self._parse_json_type_arg) 6241 elif is_aggregate: 6242 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 6243 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 6244 ) 6245 if not func_or_ident: 6246 return None 6247 expressions = [func_or_ident] 6248 if self._match(TokenType.COMMA): 6249 expressions.extend( 6250 self._parse_csv( 6251 lambda: self._parse_types( 6252 check_func=check_func, 6253 schema=schema, 6254 allow_identifiers=allow_identifiers, 6255 ) 6256 ) 6257 ) 6258 else: 6259 expressions = self._parse_csv(self._parse_type_size) 6260 6261 # https://docs.snowflake.com/en/sql-reference/data-types-vector 6262 if type_token == TokenType.VECTOR and len(expressions) == 2: 6263 expressions = self._parse_vector_expressions(expressions) 6264 6265 if not self._match(TokenType.R_PAREN): 6266 self._retreat(index) 6267 return None 6268 6269 maybe_func = True 6270 6271 values: list[exp.Expr] | None = None 6272 6273 if nested and self._match(TokenType.LT): 6274 if is_struct: 6275 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6276 else: 6277 expressions = self._parse_csv( 6278 lambda: self._parse_types( 6279 check_func=check_func, 6280 schema=schema, 6281 allow_identifiers=allow_identifiers, 6282 with_collation=True, 6283 ) 6284 ) 6285 6286 if not self._match(TokenType.GT): 6287 self.raise_error("Expecting >") 6288 6289 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 6290 values = self._parse_csv(self._parse_disjunction) 6291 if not values and is_struct: 6292 values = None 6293 self._retreat(self._index - 1) 6294 else: 6295 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 6296 6297 if type_token in self.TIMESTAMPS: 6298 if self._match_text_seq("WITH", "TIME", "ZONE"): 6299 maybe_func = False 6300 tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ 6301 this = exp.DataType(this=tz_type, expressions=expressions) 6302 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 6303 maybe_func = False 6304 this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions) 6305 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 6306 maybe_func = False 6307 elif type_token == TokenType.INTERVAL: 6308 if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 6309 unit = self._parse_var(upper=True) 6310 if self._match_text_seq("TO"): 6311 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 6312 6313 this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit)))) 6314 else: 6315 this = self.expression(exp.DataType(this=exp.DType.INTERVAL)) 6316 elif type_token == TokenType.VOID: 6317 this = exp.DataType(this=exp.DType.NULL) 6318 6319 if maybe_func and check_func: 6320 index2 = self._index 6321 peek = self._parse_string() 6322 6323 if not peek: 6324 self._retreat(index) 6325 return None 6326 6327 self._retreat(index2) 6328 6329 if not this: 6330 assert type_token is not None 6331 if self._match_text_seq("UNSIGNED"): 6332 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 6333 if not unsigned_type_token: 6334 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 6335 6336 type_token = unsigned_type_token or type_token 6337 6338 # NULLABLE without parentheses can be a column (Presto/Trino) 6339 if type_token == TokenType.NULLABLE and not expressions: 6340 self._retreat(index) 6341 return None 6342 6343 this = exp.DataType( 6344 this=exp.DType[type_token.name], 6345 expressions=expressions, 6346 nested=nested, 6347 ) 6348 6349 # Empty arrays/structs are allowed 6350 if values is not None: 6351 cls = exp.Struct if is_struct else exp.Array 6352 this = exp.cast(cls(expressions=values), this, copy=False) 6353 6354 elif expressions: 6355 this.set("expressions", expressions) 6356 6357 # https://materialize.com/docs/sql/types/list/#type-name 6358 while self._match(TokenType.LIST): 6359 this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True) 6360 6361 index = self._index 6362 6363 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 6364 matched_array = self._match(TokenType.ARRAY) 6365 6366 while self._curr: 6367 datatype_token = self._prev.token_type 6368 matched_l_bracket = self._match(TokenType.L_BRACKET) 6369 6370 if (not matched_l_bracket and not matched_array) or ( 6371 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 6372 ): 6373 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 6374 # not to be confused with the fixed size array parsing 6375 break 6376 6377 matched_array = False 6378 values = self._parse_csv(self._parse_disjunction) or None 6379 if ( 6380 values 6381 and not schema 6382 and ( 6383 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 6384 or datatype_token == TokenType.ARRAY 6385 or not self._match(TokenType.R_BRACKET, advance=False) 6386 ) 6387 ): 6388 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 6389 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 6390 self._retreat(index) 6391 break 6392 6393 this = exp.DataType( 6394 this=exp.DType.ARRAY, expressions=[this], values=values, nested=True 6395 ) 6396 self._match(TokenType.R_BRACKET) 6397 6398 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType): 6399 converter = self.TYPE_CONVERTERS.get(this.this) 6400 if converter: 6401 this = converter(t.cast(exp.DataType, this)) 6402 6403 if with_collation and isinstance(this, exp.DataType) and self._match(TokenType.COLLATE): 6404 this.set("collate", self._parse_identifier() or self._parse_column()) 6405 6406 return this 6407 6408 def _parse_json_type_arg(self) -> exp.Expr | None: 6409 """Parse a single argument to ClickHouse's JSON type.""" 6410 6411 # SKIP col or SKIP REGEXP 'pattern' 6412 if self._match_text_seq("SKIP"): 6413 regexp = self._match(TokenType.RLIKE) 6414 arg = self._parse_column() 6415 if isinstance(arg, exp.Column): 6416 arg = arg.to_dot() 6417 return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg)) 6418 6419 param_or_col = self._parse_column() 6420 if not isinstance(param_or_col, exp.Column): 6421 return None 6422 6423 # Parameter: name=value (e.g., max_dynamic_paths=2) 6424 if len(param_or_col.parts) == 1 and self._match(TokenType.EQ): 6425 param = param_or_col.name 6426 value = self._parse_primary() 6427 return self.expression(exp.EQ(this=exp.var(param), expression=value)) 6428 6429 # Column type hint: col_name Type 6430 col = param_or_col.to_dot() 6431 kind = self._parse_types(check_func=False, allow_identifiers=False) 6432 return self.expression(exp.ColumnDef(this=col, kind=kind)) 6433 6434 def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]: 6435 return [exp.DataType.from_str(expressions[0].name, dialect=self.dialect), *expressions[1:]] 6436 6437 def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None: 6438 index = self._index 6439 6440 if ( 6441 self._curr 6442 and self._next 6443 and self._curr.token_type in self.TYPE_TOKENS 6444 and self._next.token_type in self.TYPE_TOKENS 6445 ): 6446 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 6447 # type token. Without this, the list will be parsed as a type and we'll eventually crash 6448 this = self._parse_id_var() 6449 else: 6450 this = ( 6451 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6452 or self._parse_id_var() 6453 ) 6454 6455 self._match(TokenType.COLON) 6456 6457 if ( 6458 type_required 6459 and not isinstance(this, exp.DataType) 6460 and not self._match_set(self.TYPE_TOKENS, advance=False) 6461 ): 6462 self._retreat(index) 6463 return self._parse_types() 6464 6465 return self._parse_column_def(this) 6466 6467 def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None: 6468 if not self._match_text_seq("AT", "TIME", "ZONE"): 6469 return this 6470 return self._parse_at_time_zone( 6471 self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary())) 6472 ) 6473 6474 def _parse_atom(self) -> exp.Expr | None: 6475 if ( 6476 self._curr.token_type in self.IDENTIFIER_TOKENS 6477 and (column := self._parse_column()) is not None 6478 ): 6479 return column 6480 6481 token = self._curr 6482 token_type = token.token_type 6483 6484 if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)): 6485 return None 6486 6487 next_type = self._next.token_type 6488 6489 if ( 6490 next_type in self.COLUMN_OPERATORS 6491 or next_type in self.COLUMN_POSTFIX_TOKENS 6492 or (token_type == TokenType.STRING and next_type == TokenType.STRING) 6493 ): 6494 return None 6495 6496 self._advance() 6497 return primary_parser(self, token) 6498 6499 def _parse_column(self) -> exp.Expr | None: 6500 column: exp.Expr | None = self._parse_column_parts_fast() 6501 if column is None: 6502 this = self._parse_column_reference() 6503 if not this: 6504 this = self._parse_bracket(this) 6505 column = self._parse_column_ops(this) if this else this 6506 6507 if column: 6508 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: 6509 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6510 if self.COLON_IS_VARIANT_EXTRACT: 6511 column = self._parse_colon_as_variant_extract(column) 6512 6513 return column 6514 6515 def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None: 6516 """Fast path for simple column and dot references (a, a.b, ...). 6517 6518 Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks 6519 that nothing complex follows. If it does, retreats and returns None so 6520 the slow path can handle it. For >4 parts, wraps in exp.Dot nodes. 6521 """ 6522 index = self._index 6523 parts: list[exp.Identifier] | None = None 6524 all_comments: list[str] | None = None 6525 6526 while self._match_set(self.IDENTIFIER_TOKENS): 6527 token = self._prev 6528 comments = self._prev_comments 6529 6530 if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS: 6531 self._retreat(index) 6532 return None 6533 6534 has_dot = self._match(TokenType.DOT) 6535 curr_tt = self._curr.token_type 6536 6537 if not has_dot: 6538 if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS: 6539 self._retreat(index) 6540 return None 6541 elif curr_tt not in self.IDENTIFIER_TOKENS: 6542 self._retreat(index) 6543 return None 6544 6545 if parts is None: 6546 parts = [] 6547 6548 if comments: 6549 if all_comments is None: 6550 all_comments = [] 6551 all_comments.extend(comments) 6552 self._prev_comments = [] 6553 6554 parts.append( 6555 self.expression( 6556 exp.Identifier( 6557 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 6558 ), 6559 token, 6560 ) 6561 ) 6562 6563 if not has_dot: 6564 break 6565 6566 if parts is None: 6567 return None 6568 6569 n = len(parts) 6570 6571 if n == 1: 6572 column: exp.Column | exp.Dot = exp.Column(this=parts[0]) 6573 elif n == 2: 6574 column = exp.Column(this=parts[1], table=parts[0]) 6575 elif n == 3: 6576 column = exp.Column(this=parts[2], table=parts[1], db=parts[0]) 6577 else: 6578 column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0]) 6579 6580 for i in range(4, n): 6581 column = exp.Dot(this=column, expression=parts[i]) 6582 6583 if all_comments: 6584 column.add_comments(all_comments) 6585 6586 return column 6587 6588 def _parse_column_reference(self) -> exp.Expr | None: 6589 this = self._parse_field() 6590 if ( 6591 not this 6592 and self._match(TokenType.VALUES, advance=False) 6593 and self.VALUES_FOLLOWED_BY_PAREN 6594 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6595 ): 6596 this = self._parse_id_var() 6597 6598 if isinstance(this, exp.Identifier): 6599 # We bubble up comments from the Identifier to the Column 6600 this = self.expression(exp.Column(this=this), comments=this.pop_comments()) 6601 6602 return this 6603 6604 def _build_json_extract( 6605 self, 6606 this: exp.Expr | None, 6607 path_parts: list[exp.JSONPathPart], 6608 escape: bool | None, 6609 ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]: 6610 if len(path_parts) > 1: 6611 this = self.expression( 6612 exp.JSONExtract( 6613 this=this, 6614 expression=exp.JSONPath(expressions=path_parts, escape=escape), 6615 variant_extract=True, 6616 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6617 ) 6618 ) 6619 path_parts = [exp.JSONPathRoot()] 6620 6621 return this, path_parts 6622 6623 def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None: 6624 path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()] 6625 escape = None 6626 6627 while self._match(TokenType.COLON): 6628 key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6629 6630 if key: 6631 if isinstance(key, exp.Identifier) and key.quoted: 6632 escape = True 6633 path_parts.append(exp.JSONPathKey(this=key.name)) 6634 6635 while True: 6636 if self._match(TokenType.DOT): 6637 next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6638 6639 if next_key: 6640 if isinstance(next_key, exp.Identifier) and next_key.quoted: 6641 escape = True 6642 path_parts.append(exp.JSONPathKey(this=next_key.name)) 6643 elif self._match(TokenType.L_BRACKET): 6644 bracket_expr = self._parse_bracket_key_value() 6645 6646 if not self._match(TokenType.R_BRACKET): 6647 self.raise_error("Expected ]") 6648 6649 if bracket_expr: 6650 if bracket_expr.is_string: 6651 path_parts.append(exp.JSONPathKey(this=bracket_expr.name)) 6652 escape = True 6653 elif bracket_expr.is_star: 6654 path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard())) 6655 elif bracket_expr.is_number: 6656 path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py())) 6657 else: 6658 this, path_parts = self._build_json_extract(this, path_parts, escape) 6659 escape = None 6660 6661 this = self.expression( 6662 exp.Bracket( 6663 this=this, expressions=[bracket_expr], json_access=True 6664 ), 6665 ) 6666 6667 elif self._match(TokenType.DCOLON): 6668 this, path_parts = self._build_json_extract(this, path_parts, escape) 6669 escape = None 6670 6671 cast_type = self._parse_types() 6672 if cast_type: 6673 this = self.expression(exp.Cast(this=this, to=cast_type)) 6674 else: 6675 self.raise_error("Expected type after '::'") 6676 else: 6677 break 6678 6679 this, _ = self._build_json_extract(this, path_parts, escape) 6680 6681 return this 6682 6683 def _parse_dcolon(self) -> exp.Expr | None: 6684 return self._parse_types() 6685 6686 def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None: 6687 while self._curr.token_type in self.BRACKETS: 6688 this = self._parse_bracket(this) 6689 6690 column_operators = self.COLUMN_OPERATORS 6691 cast_column_operators = self.CAST_COLUMN_OPERATORS 6692 while self._curr: 6693 op_token = self._curr.token_type 6694 6695 if op_token not in column_operators: 6696 break 6697 op = column_operators[op_token] 6698 self._advance() 6699 6700 if op_token in cast_column_operators: 6701 field = self._parse_dcolon() 6702 if not field: 6703 self.raise_error("Expected type") 6704 elif op and self._curr: 6705 field = self._parse_column_reference() or self._parse_bitwise() 6706 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6707 field = self._parse_column_ops(field) 6708 else: 6709 field = self._parse_field(any_token=True, anonymous_func=True) 6710 6711 # Function calls can be qualified, e.g., x.y.FOO() 6712 # This converts the final AST to a series of Dots leading to the function call 6713 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6714 if isinstance(field, (exp.Func, exp.Window)) and this: 6715 this = this.transform( 6716 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6717 ) 6718 6719 if op: 6720 this = op(self, this, field) 6721 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6722 this = self.expression( 6723 exp.Column( 6724 this=field, 6725 table=this.this, 6726 db=this.args.get("table"), 6727 catalog=this.args.get("db"), 6728 ), 6729 comments=this.comments, 6730 ) 6731 elif isinstance(field, exp.Window): 6732 # Move the exp.Dot's to the window's function 6733 window_func = self.expression(exp.Dot(this=this, expression=field.this)) 6734 field.set("this", window_func) 6735 this = field 6736 else: 6737 this = self.expression(exp.Dot(this=this, expression=field)) 6738 6739 if field and field.comments: 6740 t.cast(exp.Expr, this).add_comments(field.pop_comments()) 6741 6742 this = self._parse_bracket(this) 6743 6744 return this 6745 6746 def _parse_paren(self) -> exp.Expr | None: 6747 if not self._match(TokenType.L_PAREN): 6748 return None 6749 6750 comments = self._prev_comments 6751 query = self._parse_select() 6752 6753 if query: 6754 expressions = [query] 6755 else: 6756 expressions = self._parse_expressions() 6757 6758 this = seq_get(expressions, 0) 6759 6760 if not this and self._match(TokenType.R_PAREN, advance=False): 6761 this = self.expression(exp.Tuple()) 6762 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6763 this = self._parse_subquery(this=this, parse_alias=False) 6764 elif isinstance(this, (exp.Subquery, exp.Values)): 6765 this = self._parse_subquery( 6766 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6767 parse_alias=False, 6768 ) 6769 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6770 this = self.expression(exp.Tuple(expressions=expressions)) 6771 else: 6772 this = self.expression(exp.Paren(this=this)) 6773 6774 if this: 6775 this.add_comments(comments) 6776 6777 self._match_r_paren(expression=this) 6778 6779 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6780 return self._parse_window(this) 6781 6782 return this 6783 6784 def _parse_primary(self) -> exp.Expr | None: 6785 if self._match_set(self.PRIMARY_PARSERS): 6786 token_type = self._prev.token_type 6787 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6788 6789 if token_type == TokenType.STRING: 6790 expressions = [primary] 6791 while self._match(TokenType.STRING): 6792 expressions.append(exp.Literal.string(self._prev.text)) 6793 6794 if len(expressions) > 1: 6795 return self.expression( 6796 exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE) 6797 ) 6798 6799 return primary 6800 6801 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6802 return exp.Literal.number(f"0.{self._prev.text}") 6803 6804 return self._parse_paren() 6805 6806 def _parse_field( 6807 self, 6808 any_token: bool = False, 6809 tokens: t.Collection[TokenType] | None = None, 6810 anonymous_func: bool = False, 6811 ) -> exp.Expr | None: 6812 if anonymous_func: 6813 field = ( 6814 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6815 or self._parse_primary() 6816 ) 6817 else: 6818 field = self._parse_primary() or self._parse_function( 6819 anonymous=anonymous_func, any_token=any_token 6820 ) 6821 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6822 6823 def _parse_function( 6824 self, 6825 functions: dict[str, t.Callable] | None = None, 6826 anonymous: bool = False, 6827 optional_parens: bool = True, 6828 any_token: bool = False, 6829 ) -> exp.Expr | None: 6830 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6831 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6832 fn_syntax = False 6833 if ( 6834 self._match(TokenType.L_BRACE, advance=False) 6835 and self._next 6836 and self._next.text.upper() == "FN" 6837 ): 6838 self._advance(2) 6839 fn_syntax = True 6840 6841 func = self._parse_function_call( 6842 functions=functions, 6843 anonymous=anonymous, 6844 optional_parens=optional_parens, 6845 any_token=any_token, 6846 ) 6847 6848 if fn_syntax: 6849 self._match(TokenType.R_BRACE) 6850 6851 return func 6852 6853 def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]: 6854 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6855 6856 def _parse_function_call( 6857 self, 6858 functions: dict[str, t.Callable] | None = None, 6859 anonymous: bool = False, 6860 optional_parens: bool = True, 6861 any_token: bool = False, 6862 ) -> exp.Expr | None: 6863 if not self._curr: 6864 return None 6865 6866 comments = self._curr.comments 6867 prev = self._prev 6868 token = self._curr 6869 token_type = self._curr.token_type 6870 this: str | exp.Expr = self._curr.text 6871 upper = self._curr.text.upper() 6872 6873 after_dot = prev.token_type == TokenType.DOT 6874 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6875 if ( 6876 optional_parens 6877 and parser 6878 and token_type not in self.INVALID_FUNC_NAME_TOKENS 6879 and not after_dot 6880 ): 6881 self._advance() 6882 return self._parse_window(parser(self)) 6883 6884 if self._next.token_type != TokenType.L_PAREN: 6885 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS and not after_dot: 6886 self._advance() 6887 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]()) 6888 6889 return None 6890 6891 if any_token: 6892 if token_type in self.RESERVED_TOKENS: 6893 return None 6894 elif token_type not in self.FUNC_TOKENS: 6895 return None 6896 6897 self._advance(2) 6898 6899 parser = self.FUNCTION_PARSERS.get(upper) 6900 if parser and not anonymous: 6901 result = parser(self) 6902 else: 6903 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6904 6905 if subquery_predicate: 6906 expr = None 6907 if self._curr.token_type in self.SUBQUERY_TOKENS: 6908 expr = self._parse_select() 6909 self._match_r_paren() 6910 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6911 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6912 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6913 self._advance(-1) 6914 expr = self._parse_bitwise() 6915 6916 if expr: 6917 return self.expression(subquery_predicate(this=expr), comments=comments) 6918 6919 if functions is None: 6920 functions = self.FUNCTIONS 6921 6922 function = functions.get(upper) 6923 known_function = function and not anonymous 6924 6925 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6926 args = self._parse_function_args(alias) 6927 6928 post_func_comments = self._curr.comments if self._curr else None 6929 if known_function and post_func_comments: 6930 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6931 # call we'll construct it as exp.Anonymous, even if it's "known" 6932 if any( 6933 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6934 for comment in post_func_comments 6935 ): 6936 known_function = False 6937 6938 if alias and known_function: 6939 args = self._kv_to_prop_eq(args) 6940 6941 if known_function: 6942 func_builder = t.cast(t.Callable, function) 6943 6944 # mypyc compiled functions don't have __code__, so we use 6945 # try/except to check if func_builder accepts 'dialect'. 6946 try: 6947 func = func_builder(args) 6948 except TypeError: 6949 func = func_builder(args, dialect=self.dialect) 6950 6951 func = self.validate_expression(func, args) 6952 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6953 func.meta["name"] = this 6954 6955 result = func 6956 else: 6957 if token_type == TokenType.IDENTIFIER: 6958 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6959 6960 result = self.expression(exp.Anonymous(this=this, expressions=args)) 6961 6962 result = result.update_positions(token) 6963 6964 if isinstance(result, exp.Expr): 6965 result.add_comments(comments) 6966 6967 if parser: 6968 self._match(TokenType.R_PAREN, expression=result) 6969 else: 6970 self._match_r_paren(result) 6971 return self._parse_window(result) 6972 6973 def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: 6974 return expression 6975 6976 def _kv_to_prop_eq( 6977 self, expressions: list[exp.Expr], parse_map: bool = False 6978 ) -> list[exp.Expr]: 6979 transformed = [] 6980 6981 for index, e in enumerate(expressions): 6982 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6983 if isinstance(e, exp.Alias): 6984 e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this)) 6985 6986 if not isinstance(e, exp.PropertyEQ): 6987 e = self.expression( 6988 exp.PropertyEQ( 6989 this=e.this if parse_map else exp.to_identifier(e.this.name), 6990 expression=e.expression, 6991 ) 6992 ) 6993 6994 if isinstance(e.this, exp.Column): 6995 e.this.replace(e.this.this) 6996 else: 6997 e = self._to_prop_eq(e, index) 6998 6999 transformed.append(e) 7000 7001 return transformed 7002 7003 def _parse_function_properties(self) -> exp.Properties | None: 7004 return self._parse_properties() 7005 7006 def _parse_user_defined_function_expression(self) -> exp.Expr | None: 7007 return self._parse_statement() 7008 7009 def _parse_function_parameter(self) -> exp.Expr | None: 7010 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 7011 7012 def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None: 7013 this = self._parse_table_parts(schema=True) 7014 7015 if not self._match(TokenType.L_PAREN): 7016 return this 7017 7018 expressions = self._parse_csv(self._parse_function_parameter) 7019 self._match_r_paren() 7020 return self.expression( 7021 exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True) 7022 ) 7023 7024 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 7025 literal = self._parse_primary() 7026 if literal: 7027 return self.expression(exp.Introducer(this=token.text, expression=literal), token) 7028 7029 return self._identifier_expression(token) 7030 7031 def _parse_session_parameter(self) -> exp.SessionParameter: 7032 kind = None 7033 this = self._parse_id_var() or self._parse_primary() 7034 7035 if this and self._match(TokenType.DOT): 7036 kind = this.name 7037 this = self._parse_var() or self._parse_primary() 7038 7039 return self.expression(exp.SessionParameter(this=this, kind=kind)) 7040 7041 def _parse_lambda_arg(self) -> exp.Expr | None: 7042 return self._parse_id_var() 7043 7044 def _parse_lambda(self, alias: bool = False) -> exp.Expr | None: 7045 next_token_type = self._next.token_type 7046 7047 # Fast path: simple atom (column, literal, null, bool) followed by , or ) 7048 if ( 7049 next_token_type in self.LAMBDA_ARG_TERMINATORS 7050 and (atom := self._parse_atom()) is not None 7051 ): 7052 return atom 7053 7054 index = self._index 7055 7056 if self._match(TokenType.L_PAREN): 7057 expressions = t.cast( 7058 list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg) 7059 ) 7060 7061 if not self._match(TokenType.R_PAREN): 7062 self._retreat(index) 7063 elif self._match_set(self.LAMBDAS): 7064 return self.LAMBDAS[self._prev.token_type](self, expressions) 7065 else: 7066 self._retreat(index) 7067 elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS: 7068 expressions = [self._parse_lambda_arg()] 7069 7070 if self._match_set(self.LAMBDAS): 7071 return self.LAMBDAS[self._prev.token_type](self, expressions) 7072 7073 self._retreat(index) 7074 7075 this: exp.Expr | None 7076 7077 if self._match(TokenType.DISTINCT): 7078 this = self.expression( 7079 exp.Distinct(expressions=self._parse_csv(self._parse_disjunction)) 7080 ) 7081 else: 7082 self._match(TokenType.ALL) # ALL is the default/no-op aggregate modifier (SQL-92) 7083 this = self._parse_select_or_expression(alias=alias) 7084 7085 return self._parse_limit( 7086 self._parse_respect_or_ignore_nulls( 7087 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 7088 ) 7089 ) 7090 7091 def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None: 7092 index = self._index 7093 if not self._match(TokenType.L_PAREN): 7094 return this 7095 7096 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 7097 # expr can be of both types 7098 if self._match_set(self.SELECT_START_TOKENS): 7099 self._retreat(index) 7100 return this 7101 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 7102 self._match_r_paren() 7103 return self.expression(exp.Schema(this=this, expressions=args)) 7104 7105 def _parse_field_def(self) -> exp.Expr | None: 7106 return self._parse_column_def(self._parse_field(any_token=True)) 7107 7108 def _parse_column_def( 7109 self, this: exp.Expr | None, computed_column: bool = True 7110 ) -> exp.Expr | None: 7111 # column defs are not really columns, they're identifiers 7112 if isinstance(this, exp.Column): 7113 this = this.this 7114 7115 if not computed_column: 7116 self._match(TokenType.ALIAS) 7117 7118 kind = self._parse_types(schema=True) 7119 7120 if self._match_text_seq("FOR", "ORDINALITY"): 7121 return self.expression(exp.ColumnDef(this=this, ordinality=True)) 7122 7123 constraints: list[exp.Expr] = [] 7124 7125 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 7126 ("ALIAS", "MATERIALIZED") 7127 ): 7128 persisted = self._prev.text.upper() == "MATERIALIZED" 7129 constraint_kind = exp.ComputedColumnConstraint( 7130 this=self._parse_disjunction(), 7131 persisted=persisted or self._match_text_seq("PERSISTED"), 7132 data_type=exp.Var(this="AUTO") 7133 if self._match_text_seq("AUTO") 7134 else self._parse_types(), 7135 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 7136 ) 7137 constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind))) 7138 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 7139 in_out_constraint = self.expression( 7140 exp.InOutColumnConstraint( 7141 input_=self._match(TokenType.IN), output=self._match(TokenType.OUT) 7142 ) 7143 ) 7144 constraints.append(in_out_constraint) 7145 kind = self._parse_types() 7146 elif ( 7147 kind 7148 and self._match(TokenType.ALIAS, advance=False) 7149 and ( 7150 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 7151 or self._next.token_type == TokenType.L_PAREN 7152 ) 7153 ): 7154 self._advance() 7155 constraints.append( 7156 self.expression( 7157 exp.ColumnConstraint( 7158 kind=exp.ComputedColumnConstraint( 7159 this=self._parse_disjunction(), 7160 persisted=self._match_texts(("STORED", "VIRTUAL")) 7161 and self._prev.text.upper() == "STORED", 7162 ) 7163 ) 7164 ) 7165 ) 7166 7167 while True: 7168 constraint = self._parse_column_constraint() 7169 if not constraint: 7170 break 7171 constraints.append(constraint) 7172 7173 if not kind and not constraints: 7174 return this 7175 7176 position = None 7177 if self._match_texts(("FIRST", "AFTER")): 7178 pos = self._prev.text 7179 position = self.expression(exp.ColumnPosition(this=self._parse_column(), position=pos)) 7180 7181 return self.expression( 7182 exp.ColumnDef(this=this, kind=kind, constraints=constraints, position=position) 7183 ) 7184 7185 def _parse_auto_increment( 7186 self, 7187 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 7188 start = None 7189 increment = None 7190 order = None 7191 7192 if self._match(TokenType.L_PAREN, advance=False): 7193 args = self._parse_wrapped_csv(self._parse_bitwise) 7194 start = seq_get(args, 0) 7195 increment = seq_get(args, 1) 7196 elif self._match_text_seq("START"): 7197 start = self._parse_bitwise() 7198 self._match_text_seq("INCREMENT") 7199 increment = self._parse_bitwise() 7200 if self._match_text_seq("ORDER"): 7201 order = True 7202 elif self._match_text_seq("NOORDER"): 7203 order = False 7204 7205 if start and increment: 7206 return exp.GeneratedAsIdentityColumnConstraint( 7207 start=start, increment=increment, this=False, order=order 7208 ) 7209 7210 return exp.AutoIncrementColumnConstraint() 7211 7212 def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None: 7213 if not self._match(TokenType.L_PAREN, advance=False): 7214 return None 7215 7216 return self.expression( 7217 exp.CheckColumnConstraint( 7218 this=self._parse_wrapped(self._parse_assignment), 7219 enforced=self._match_text_seq("ENFORCED"), 7220 ) 7221 ) 7222 7223 def _parse_auto_property(self) -> exp.AutoRefreshProperty | None: 7224 if not self._match_text_seq("REFRESH"): 7225 self._retreat(self._index - 1) 7226 return None 7227 return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True))) 7228 7229 def _parse_compress(self) -> exp.CompressColumnConstraint: 7230 if self._match(TokenType.L_PAREN, advance=False): 7231 return self.expression( 7232 exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise)) 7233 ) 7234 7235 return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise())) 7236 7237 def _parse_generated_as_identity( 7238 self, 7239 ) -> ( 7240 exp.GeneratedAsIdentityColumnConstraint 7241 | exp.ComputedColumnConstraint 7242 | exp.GeneratedAsRowColumnConstraint 7243 ): 7244 if self._match_text_seq("BY", "DEFAULT"): 7245 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 7246 this = self.expression( 7247 exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null) 7248 ) 7249 else: 7250 self._match_text_seq("ALWAYS") 7251 this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True)) 7252 7253 self._match(TokenType.ALIAS) 7254 7255 if self._match_text_seq("ROW"): 7256 start = self._match_text_seq("START") 7257 if not start: 7258 self._match(TokenType.END) 7259 hidden = self._match_text_seq("HIDDEN") 7260 return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden)) 7261 7262 identity = self._match_text_seq("IDENTITY") 7263 7264 if self._match(TokenType.L_PAREN): 7265 if self._match(TokenType.START_WITH): 7266 this.set("start", self._parse_bitwise()) 7267 if self._match_text_seq("INCREMENT", "BY"): 7268 this.set("increment", self._parse_bitwise()) 7269 if self._match_text_seq("MINVALUE"): 7270 this.set("minvalue", self._parse_bitwise()) 7271 if self._match_text_seq("MAXVALUE"): 7272 this.set("maxvalue", self._parse_bitwise()) 7273 7274 if self._match_text_seq("CYCLE"): 7275 this.set("cycle", True) 7276 elif self._match_text_seq("NO", "CYCLE"): 7277 this.set("cycle", False) 7278 7279 if not identity: 7280 this.set("expression", self._parse_range()) 7281 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 7282 args = self._parse_csv(self._parse_bitwise) 7283 this.set("start", seq_get(args, 0)) 7284 this.set("increment", seq_get(args, 1)) 7285 7286 self._match_r_paren() 7287 7288 return this 7289 7290 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 7291 self._match_text_seq("LENGTH") 7292 return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise())) 7293 7294 def _parse_not_constraint(self) -> exp.Expr | None: 7295 if self._match_text_seq("NULL"): 7296 return self.expression(exp.NotNullColumnConstraint()) 7297 if self._match_text_seq("CASESPECIFIC"): 7298 return self.expression(exp.CaseSpecificColumnConstraint(not_=True)) 7299 if self._match_text_seq("FOR", "REPLICATION"): 7300 return self.expression(exp.NotForReplicationColumnConstraint()) 7301 7302 # Unconsume the `NOT` token 7303 self._retreat(self._index - 1) 7304 return None 7305 7306 def _parse_column_constraint(self) -> exp.Expr | None: 7307 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 7308 7309 procedure_option_follows = ( 7310 self._match(TokenType.WITH, advance=False) 7311 and self._next 7312 and self._next.text.upper() in self.PROCEDURE_OPTIONS 7313 ) 7314 7315 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 7316 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 7317 if not constraint: 7318 self._retreat(self._index - 1) 7319 return None 7320 7321 return self.expression(exp.ColumnConstraint(this=this, kind=constraint)) 7322 7323 return this 7324 7325 def _parse_constraint(self) -> exp.Expr | None: 7326 if not self._match(TokenType.CONSTRAINT): 7327 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 7328 7329 return self.expression( 7330 exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints()) 7331 ) 7332 7333 def _parse_unnamed_constraints(self) -> list[exp.Expr]: 7334 constraints = [] 7335 while True: 7336 constraint = self._parse_unnamed_constraint() or self._parse_function() 7337 if not constraint: 7338 break 7339 constraints.append(constraint) 7340 7341 return constraints 7342 7343 def _parse_unnamed_constraint( 7344 self, constraints: t.Collection[str] | None = None 7345 ) -> exp.Expr | None: 7346 index = self._index 7347 7348 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 7349 constraints or self.CONSTRAINT_PARSERS 7350 ): 7351 return None 7352 7353 constraint_key = self._prev.text.upper() 7354 if constraint_key not in self.CONSTRAINT_PARSERS: 7355 self.raise_error(f"No parser found for schema constraint {constraint_key}.") 7356 7357 result = self.CONSTRAINT_PARSERS[constraint_key](self) 7358 if not result: 7359 self._retreat(index) 7360 7361 return result 7362 7363 def _parse_unique_key(self) -> exp.Expr | None: 7364 if ( 7365 self._curr 7366 and self._curr.token_type != TokenType.IDENTIFIER 7367 and self._curr.text.upper() in self.CONSTRAINT_PARSERS 7368 ): 7369 return None 7370 return self._parse_id_var(any_token=False) 7371 7372 def _parse_unique(self) -> exp.UniqueColumnConstraint: 7373 self._match_texts(("KEY", "INDEX")) 7374 return self.expression( 7375 exp.UniqueColumnConstraint( 7376 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 7377 this=self._parse_schema(self._parse_unique_key()), 7378 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 7379 on_conflict=self._parse_on_conflict(), 7380 options=self._parse_key_constraint_options(), 7381 ) 7382 ) 7383 7384 def _parse_key_constraint_options(self) -> list[str]: 7385 options = [] 7386 while True: 7387 if not self._curr: 7388 break 7389 7390 if self._match(TokenType.ON): 7391 action = None 7392 on = self._advance_any() and self._prev.text 7393 7394 if self._match_text_seq("NO", "ACTION"): 7395 action = "NO ACTION" 7396 elif self._match_text_seq("CASCADE"): 7397 action = "CASCADE" 7398 elif self._match_text_seq("RESTRICT"): 7399 action = "RESTRICT" 7400 elif self._match_pair(TokenType.SET, TokenType.NULL): 7401 action = "SET NULL" 7402 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 7403 action = "SET DEFAULT" 7404 else: 7405 self.raise_error("Invalid key constraint") 7406 7407 options.append(f"ON {on} {action}") 7408 else: 7409 var = self._parse_var_from_options( 7410 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 7411 ) 7412 if not var: 7413 break 7414 options.append(var.name) 7415 7416 return options 7417 7418 def _parse_references(self, match: bool = True) -> exp.Reference | None: 7419 if match and not self._match(TokenType.REFERENCES): 7420 return None 7421 7422 expressions: list | None = None 7423 this = self._parse_table(schema=True) 7424 options = self._parse_key_constraint_options() 7425 return self.expression(exp.Reference(this=this, expressions=expressions, options=options)) 7426 7427 def _parse_foreign_key(self) -> exp.ForeignKey: 7428 expressions = ( 7429 self._parse_wrapped_id_vars() 7430 if not self._match(TokenType.REFERENCES, advance=False) 7431 else None 7432 ) 7433 reference = self._parse_references() 7434 on_options = {} 7435 7436 while self._match(TokenType.ON): 7437 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 7438 self.raise_error("Expected DELETE or UPDATE") 7439 7440 kind = self._prev.text.lower() 7441 7442 if self._match_text_seq("NO", "ACTION"): 7443 action = "NO ACTION" 7444 elif self._match(TokenType.SET): 7445 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 7446 action = "SET " + self._prev.text.upper() 7447 else: 7448 self._advance() 7449 action = self._prev.text.upper() 7450 7451 on_options[kind] = action 7452 7453 return self.expression( 7454 exp.ForeignKey( 7455 expressions=expressions, 7456 reference=reference, 7457 options=self._parse_key_constraint_options(), 7458 **on_options, 7459 ) 7460 ) 7461 7462 def _parse_primary_key_part(self) -> exp.Expr | None: 7463 return self._parse_field() 7464 7465 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None: 7466 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 7467 self._retreat(self._index - 1) 7468 return None 7469 7470 id_vars = self._parse_wrapped_id_vars() 7471 return self.expression( 7472 exp.PeriodForSystemTimeConstraint( 7473 this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1) 7474 ) 7475 ) 7476 7477 def _parse_primary_key( 7478 self, 7479 wrapped_optional: bool = False, 7480 in_props: bool = False, 7481 named_primary_key: bool = False, 7482 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 7483 desc = ( 7484 self._prev.token_type == TokenType.DESC 7485 if self._match_set((TokenType.ASC, TokenType.DESC)) 7486 else None 7487 ) 7488 7489 this = None 7490 if ( 7491 named_primary_key 7492 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 7493 and self._next 7494 and self._next.token_type == TokenType.L_PAREN 7495 ): 7496 this = self._parse_id_var() 7497 7498 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 7499 return self.expression( 7500 exp.PrimaryKeyColumnConstraint( 7501 desc=desc, options=self._parse_key_constraint_options() 7502 ) 7503 ) 7504 7505 expressions = self._parse_wrapped_csv( 7506 self._parse_primary_key_part, optional=wrapped_optional 7507 ) 7508 7509 return self.expression( 7510 exp.PrimaryKey( 7511 this=this, 7512 expressions=expressions, 7513 include=self._parse_index_params(), 7514 options=self._parse_key_constraint_options(), 7515 ) 7516 ) 7517 7518 def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None: 7519 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 7520 7521 def _parse_odbc_datetime_literal(self) -> exp.Expr: 7522 """ 7523 Parses a datetime column in ODBC format. We parse the column into the corresponding 7524 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 7525 same as we did for `DATE('yyyy-mm-dd')`. 7526 7527 Reference: 7528 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 7529 """ 7530 self._match(TokenType.VAR) 7531 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 7532 expression = self.expression(exp_class(this=self._parse_string())) 7533 if not self._match(TokenType.R_BRACE): 7534 self.raise_error("Expected }") 7535 return expression 7536 7537 def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None: 7538 if not self._match_set(self.BRACKETS): 7539 return this 7540 7541 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 7542 map_token = seq_get(self._tokens, self._index - 2) 7543 parse_map = map_token is not None and map_token.text.upper() == "MAP" 7544 else: 7545 parse_map = False 7546 7547 bracket_kind = self._prev.token_type 7548 if ( 7549 bracket_kind == TokenType.L_BRACE 7550 and self._curr 7551 and self._curr.token_type == TokenType.VAR 7552 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 7553 ): 7554 return self._parse_odbc_datetime_literal() 7555 7556 expressions = self._parse_csv( 7557 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 7558 ) 7559 7560 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 7561 self.raise_error("Expected ]") 7562 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 7563 self.raise_error("Expected }") 7564 7565 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 7566 if bracket_kind == TokenType.L_BRACE: 7567 this = self.expression( 7568 exp.Struct( 7569 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map) 7570 ) 7571 ) 7572 elif not this: 7573 this = build_array_constructor( 7574 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 7575 ) 7576 else: 7577 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 7578 if constructor_type: 7579 return build_array_constructor( 7580 constructor_type, 7581 args=expressions, 7582 bracket_kind=bracket_kind, 7583 dialect=self.dialect, 7584 ) 7585 7586 expressions = apply_index_offset( 7587 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 7588 ) 7589 this = self.expression( 7590 exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments() 7591 ) 7592 7593 self._add_comments(this) 7594 return self._parse_bracket(this) 7595 7596 def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None: 7597 if not self._match(TokenType.COLON): 7598 return this 7599 7600 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 7601 self._advance() 7602 end: exp.Expr | None = -exp.Literal.number("1") 7603 else: 7604 end = self._parse_assignment() 7605 step = self._parse_unary() if self._match(TokenType.COLON) else None 7606 return self.expression(exp.Slice(this=this, expression=end, step=step)) 7607 7608 def _parse_case(self) -> exp.Expr | None: 7609 if self._match(TokenType.DOT, advance=False): 7610 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 7611 self._retreat(self._index - 1) 7612 return None 7613 7614 ifs = [] 7615 default = None 7616 7617 comments = self._prev_comments 7618 expression = self._parse_disjunction() 7619 7620 while self._match(TokenType.WHEN): 7621 this = self._parse_disjunction() 7622 self._match(TokenType.THEN) 7623 then = self._parse_disjunction() 7624 ifs.append(self.expression(exp.If(this=this, true=then))) 7625 7626 if self._match(TokenType.ELSE): 7627 default = self._parse_disjunction() 7628 7629 if not self._match(TokenType.END): 7630 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7631 default = exp.column("interval") 7632 else: 7633 self.raise_error("Expected END after CASE", self._prev) 7634 7635 return self.expression( 7636 exp.Case(this=expression, ifs=ifs, default=default), comments=comments 7637 ) 7638 7639 def _parse_if(self) -> exp.Expr | None: 7640 if self._match(TokenType.L_PAREN): 7641 args = self._parse_csv( 7642 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7643 ) 7644 this = self.validate_expression(exp.If.from_arg_list(args), args) 7645 self._match_r_paren() 7646 else: 7647 index = self._index - 1 7648 7649 if self.NO_PAREN_IF_COMMANDS and index == 0: 7650 return self._parse_as_command(self._prev) 7651 7652 condition = self._parse_disjunction() 7653 7654 if not condition: 7655 self._retreat(index) 7656 return None 7657 7658 self._match(TokenType.THEN) 7659 true = self._parse_disjunction() 7660 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7661 self._match(TokenType.END) 7662 this = self.expression(exp.If(this=condition, true=true, false=false)) 7663 7664 return this 7665 7666 def _parse_next_value_for(self) -> exp.Expr | None: 7667 if not self._match_text_seq("VALUE", "FOR"): 7668 self._retreat(self._index - 1) 7669 return None 7670 7671 return self.expression( 7672 exp.NextValueFor( 7673 this=self._parse_column(), 7674 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7675 ) 7676 ) 7677 7678 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7679 this = self._parse_function() or self._parse_var_or_string(upper=True) 7680 7681 if self._match(TokenType.FROM): 7682 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7683 7684 if not self._match(TokenType.COMMA): 7685 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7686 7687 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7688 7689 def _parse_gap_fill(self) -> exp.GapFill: 7690 self._match(TokenType.TABLE) 7691 this = self._parse_table() 7692 7693 self._match(TokenType.COMMA) 7694 args = [this, *self._parse_csv(self._parse_lambda)] 7695 7696 gap_fill = exp.GapFill.from_arg_list(args) 7697 return self.validate_expression(gap_fill, args) 7698 7699 def _parse_char(self) -> exp.Chr: 7700 return self.expression( 7701 exp.Chr( 7702 expressions=self._parse_csv(self._parse_assignment), 7703 charset=self._match(TokenType.USING) and self._parse_charset_name(), 7704 ) 7705 ) 7706 7707 def _parse_charset_name(self) -> exp.Expr | None: 7708 """ 7709 Parse a charset name after USING or CHARACTER SET. Dialects that need to preserve quoting 7710 for specific name shapes override this. 7711 """ 7712 return self._parse_var( 7713 tokens={TokenType.BINARY, TokenType.IDENTIFIER}, 7714 ) 7715 7716 def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr: 7717 this = self._parse_assignment() 7718 7719 if not self._match(TokenType.ALIAS): 7720 if self._match(TokenType.COMMA): 7721 return self.expression(exp.CastToStrType(this=this, to=self._parse_string())) 7722 7723 self.raise_error("Expected AS after CAST") 7724 7725 fmt = None 7726 to = self._parse_types(with_collation=True) 7727 7728 default = None 7729 if self._match(TokenType.DEFAULT): 7730 default = self._parse_bitwise() 7731 self._match_text_seq("ON", "CONVERSION", "ERROR") 7732 7733 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7734 fmt_string = self._parse_wrapped(self._parse_string, optional=True) 7735 fmt = self._parse_at_time_zone(fmt_string) 7736 7737 if not to: 7738 to = exp.DType.UNKNOWN.into_expr() 7739 if to.this in exp.DataType.TEMPORAL_TYPES: 7740 this = self.expression( 7741 (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)( 7742 this=this, 7743 format=exp.Literal.string( 7744 format_time( 7745 fmt_string.this if fmt_string else "", 7746 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7747 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7748 ) 7749 ), 7750 safe=safe, 7751 ) 7752 ) 7753 7754 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7755 this.set("zone", fmt.args["zone"]) 7756 return this 7757 elif not to: 7758 self.raise_error("Expected TYPE after CAST") 7759 elif isinstance(to, exp.Identifier): 7760 to = exp.DataType.from_str(to.name, dialect=self.dialect, udt=True) 7761 elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET): 7762 to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string()) 7763 7764 return self.build_cast( 7765 strict=strict, 7766 this=this, 7767 to=to, 7768 format=fmt, 7769 safe=safe, 7770 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7771 default=default, 7772 ) 7773 7774 def _parse_string_agg(self) -> exp.GroupConcat: 7775 if self._match(TokenType.DISTINCT): 7776 args: list[exp.Expr | None] = [ 7777 self.expression(exp.Distinct(expressions=[self._parse_disjunction()])) 7778 ] 7779 if self._match(TokenType.COMMA): 7780 args.extend(self._parse_csv(self._parse_disjunction)) 7781 else: 7782 args = self._parse_csv(self._parse_disjunction) # type: ignore 7783 7784 if self._match_text_seq("ON", "OVERFLOW"): 7785 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7786 if self._match_text_seq("ERROR"): 7787 on_overflow: exp.Expr | None = exp.var("ERROR") 7788 else: 7789 self._match_text_seq("TRUNCATE") 7790 on_overflow = self.expression( 7791 exp.OverflowTruncateBehavior( 7792 this=self._parse_string(), 7793 with_count=( 7794 self._match_text_seq("WITH", "COUNT") 7795 or not self._match_text_seq("WITHOUT", "COUNT") 7796 ), 7797 ) 7798 ) 7799 else: 7800 on_overflow = None 7801 7802 index = self._index 7803 if not self._match(TokenType.R_PAREN) and args: 7804 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7805 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7806 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7807 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7808 return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1))) 7809 7810 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7811 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7812 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7813 if not self._match_text_seq("WITHIN", "GROUP"): 7814 self._retreat(index) 7815 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7816 7817 # The corresponding match_r_paren will be called in parse_function (caller) 7818 self._match_l_paren() 7819 7820 return self.expression( 7821 exp.GroupConcat( 7822 this=self._parse_order(this=seq_get(args, 0)), 7823 separator=seq_get(args, 1), 7824 on_overflow=on_overflow, 7825 ) 7826 ) 7827 7828 def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None: 7829 this = self._parse_bitwise() 7830 7831 if self._match(TokenType.USING): 7832 to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_charset_name()) 7833 elif self._match(TokenType.COMMA): 7834 to = self._parse_types() 7835 else: 7836 to = None 7837 7838 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7839 7840 def _parse_xml_element(self) -> exp.XMLElement: 7841 if self._match_text_seq("EVALNAME"): 7842 evalname = True 7843 this = self._parse_bitwise() 7844 else: 7845 evalname = None 7846 self._match_text_seq("NAME") 7847 this = self._parse_id_var() 7848 7849 return self.expression( 7850 exp.XMLElement( 7851 this=this, 7852 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7853 evalname=evalname, 7854 ) 7855 ) 7856 7857 def _parse_xml_table(self) -> exp.XMLTable: 7858 namespaces = None 7859 passing = None 7860 columns = None 7861 7862 if self._match_text_seq("XMLNAMESPACES", "("): 7863 namespaces = self._parse_xml_namespace() 7864 self._match_text_seq(")", ",") 7865 7866 this = self._parse_string() 7867 7868 if self._match_text_seq("PASSING"): 7869 # The BY VALUE keywords are optional and are provided for semantic clarity 7870 self._match_text_seq("BY", "VALUE") 7871 passing = self._parse_csv(self._parse_column) 7872 7873 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7874 7875 if self._match_text_seq("COLUMNS"): 7876 columns = self._parse_csv(self._parse_field_def) 7877 7878 return self.expression( 7879 exp.XMLTable( 7880 this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref 7881 ) 7882 ) 7883 7884 def _parse_xml_namespace(self) -> list[exp.XMLNamespace]: 7885 namespaces = [] 7886 7887 while True: 7888 if self._match(TokenType.DEFAULT): 7889 uri = self._parse_string() 7890 else: 7891 uri = self._parse_alias(self._parse_string()) 7892 namespaces.append(self.expression(exp.XMLNamespace(this=uri))) 7893 if not self._match(TokenType.COMMA): 7894 break 7895 7896 return namespaces 7897 7898 def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None: 7899 args = self._parse_csv(self._parse_disjunction) 7900 7901 if len(args) < 3: 7902 return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1))) 7903 7904 return self.expression(exp.DecodeCase(expressions=args)) 7905 7906 def _parse_json_key_value(self) -> exp.JSONKeyValue | None: 7907 self._match_text_seq("KEY") 7908 key = self._parse_column() 7909 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7910 self._match_text_seq("VALUE") 7911 value = self._parse_bitwise() 7912 7913 if not key and not value: 7914 return None 7915 return self.expression(exp.JSONKeyValue(this=key, expression=value)) 7916 7917 def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None: 7918 if not this or not self._match_text_seq("FORMAT", "JSON"): 7919 return this 7920 7921 return self.expression(exp.FormatJson(this=this)) 7922 7923 def _parse_on_condition(self) -> exp.OnCondition | None: 7924 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7925 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7926 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7927 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7928 else: 7929 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7930 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7931 7932 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7933 7934 if not empty and not error and not null: 7935 return None 7936 7937 return self.expression(exp.OnCondition(empty=empty, error=error, null=null)) 7938 7939 def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None: 7940 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7941 for value in values: 7942 if self._match_text_seq(value, "ON", on): 7943 return f"{value} ON {on}" 7944 7945 index = self._index 7946 if self._match(TokenType.DEFAULT): 7947 default_value = self._parse_bitwise() 7948 if self._match_text_seq("ON", on): 7949 return default_value 7950 7951 self._retreat(index) 7952 7953 return None 7954 7955 @t.overload 7956 def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... 7957 7958 @t.overload 7959 def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... 7960 7961 def _parse_json_object(self, agg=False): 7962 star = self._parse_star() 7963 expressions = ( 7964 [star] 7965 if star 7966 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7967 ) 7968 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7969 7970 unique_keys = None 7971 if self._match_text_seq("WITH", "UNIQUE"): 7972 unique_keys = True 7973 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7974 unique_keys = False 7975 7976 self._match_text_seq("KEYS") 7977 7978 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7979 self._parse_type() 7980 ) 7981 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7982 7983 return self.expression( 7984 (exp.JSONObjectAgg if agg else exp.JSONObject)( 7985 expressions=expressions, 7986 null_handling=null_handling, 7987 unique_keys=unique_keys, 7988 return_type=return_type, 7989 encoding=encoding, 7990 ) 7991 ) 7992 7993 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7994 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7995 if not self._match_text_seq("NESTED"): 7996 this = self._parse_id_var() 7997 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7998 kind = self._parse_types(allow_identifiers=False) 7999 nested = None 8000 else: 8001 this = None 8002 ordinality = None 8003 kind = None 8004 nested = True 8005 8006 format_json = self._match_text_seq("FORMAT", "JSON") 8007 path = self._match_text_seq("PATH") and self._parse_string() 8008 nested_schema = nested and self._parse_json_schema() 8009 8010 return self.expression( 8011 exp.JSONColumnDef( 8012 this=this, 8013 kind=kind, 8014 path=path, 8015 nested_schema=nested_schema, 8016 ordinality=ordinality, 8017 format_json=format_json, 8018 ) 8019 ) 8020 8021 def _parse_json_schema(self) -> exp.JSONSchema: 8022 self._match_text_seq("COLUMNS") 8023 return self.expression( 8024 exp.JSONSchema( 8025 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True) 8026 ) 8027 ) 8028 8029 def _parse_json_table(self) -> exp.JSONTable: 8030 this = self._parse_format_json(self._parse_bitwise()) 8031 path = self._match(TokenType.COMMA) and self._parse_string() 8032 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 8033 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 8034 schema = self._parse_json_schema() 8035 8036 return exp.JSONTable( 8037 this=this, 8038 schema=schema, 8039 path=path, 8040 error_handling=error_handling, 8041 empty_handling=empty_handling, 8042 ) 8043 8044 def _parse_match_against(self) -> exp.MatchAgainst: 8045 if self._match_text_seq("TABLE"): 8046 # parse SingleStore MATCH(TABLE ...) syntax 8047 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 8048 expressions = [] 8049 table = self._parse_table() 8050 if table: 8051 expressions = [table] 8052 else: 8053 expressions = self._parse_csv(self._parse_column) 8054 8055 self._match_text_seq(")", "AGAINST", "(") 8056 8057 this = self._parse_string() 8058 8059 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 8060 modifier = "IN NATURAL LANGUAGE MODE" 8061 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8062 modifier = f"{modifier} WITH QUERY EXPANSION" 8063 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 8064 modifier = "IN BOOLEAN MODE" 8065 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8066 modifier = "WITH QUERY EXPANSION" 8067 else: 8068 modifier = None 8069 8070 return self.expression( 8071 exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier) 8072 ) 8073 8074 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 8075 def _parse_open_json(self) -> exp.OpenJSON: 8076 this = self._parse_bitwise() 8077 path = self._match(TokenType.COMMA) and self._parse_string() 8078 8079 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 8080 this = self._parse_field(any_token=True) 8081 kind = self._parse_types() 8082 path = self._parse_string() 8083 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 8084 8085 return self.expression( 8086 exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json) 8087 ) 8088 8089 expressions = None 8090 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 8091 self._match_l_paren() 8092 expressions = self._parse_csv(_parse_open_json_column_def) 8093 8094 return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions)) 8095 8096 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 8097 args = self._parse_csv(self._parse_bitwise) 8098 8099 if self._match(TokenType.IN): 8100 return self.expression( 8101 exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0)) 8102 ) 8103 8104 if haystack_first: 8105 haystack = seq_get(args, 0) 8106 needle = seq_get(args, 1) 8107 else: 8108 haystack = seq_get(args, 1) 8109 needle = seq_get(args, 0) 8110 8111 return self.expression( 8112 exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 8113 ) 8114 8115 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 8116 args = self._parse_csv(self._parse_table) 8117 return exp.JoinHint(this=func_name.upper(), expressions=args) 8118 8119 def _parse_substring(self) -> exp.Substring: 8120 # Postgres supports the form: substring(string [from int] [for int]) 8121 # (despite being undocumented, the reverse order also works) 8122 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 8123 8124 args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise)) 8125 8126 start, length = None, None 8127 8128 while self._curr: 8129 if self._match(TokenType.FROM): 8130 start = self._parse_bitwise() 8131 elif self._match(TokenType.FOR): 8132 if not start: 8133 start = exp.Literal.number(1) 8134 length = self._parse_bitwise() 8135 else: 8136 break 8137 8138 if start: 8139 args.append(start) 8140 if length: 8141 args.append(length) 8142 8143 return self.validate_expression(exp.Substring.from_arg_list(args), args) 8144 8145 def _parse_trim(self) -> exp.Trim: 8146 # https://www.w3resource.com/sql/character-functions/trim.php 8147 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 8148 8149 position = None 8150 collation = None 8151 expression = None 8152 8153 if self._match_texts(self.TRIM_TYPES): 8154 position = self._prev.text.upper() 8155 8156 this = self._parse_bitwise() 8157 if self._match_set((TokenType.FROM, TokenType.COMMA)): 8158 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 8159 expression = self._parse_bitwise() 8160 8161 if invert_order: 8162 this, expression = expression, this 8163 8164 if self._match(TokenType.COLLATE): 8165 collation = self._parse_bitwise() 8166 8167 return self.expression( 8168 exp.Trim(this=this, position=position, expression=expression, collation=collation) 8169 ) 8170 8171 def _parse_window_clause(self) -> list[exp.Expr] | None: 8172 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 8173 8174 def _parse_named_window(self) -> exp.Expr | None: 8175 return self._parse_window(self._parse_id_var(), alias=True) 8176 8177 def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None: 8178 if self._curr.token_type == TokenType.VAR: 8179 if self._match_text_seq("IGNORE", "NULLS"): 8180 return self.expression(exp.IgnoreNulls(this=this)) 8181 if self._match_text_seq("RESPECT", "NULLS"): 8182 return self.expression(exp.RespectNulls(this=this)) 8183 return this 8184 8185 def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None: 8186 if self._match(TokenType.HAVING): 8187 self._match_texts(("MAX", "MIN")) 8188 max = self._prev.text.upper() != "MIN" 8189 return self.expression( 8190 exp.HavingMax(this=this, expression=self._parse_column(), max=max) 8191 ) 8192 8193 return this 8194 8195 def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None: 8196 func = this 8197 comments = func.comments if isinstance(func, exp.Expr) else None 8198 8199 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 8200 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 8201 if self._match_text_seq("WITHIN", "GROUP"): 8202 order = self._parse_wrapped(self._parse_order) 8203 this = self.expression(exp.WithinGroup(this=this, expression=order)) 8204 8205 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 8206 self._match(TokenType.WHERE) 8207 this = self.expression( 8208 exp.Filter(this=this, expression=self._parse_where(skip_where_token=True)) 8209 ) 8210 self._match_r_paren() 8211 8212 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 8213 # Some dialects choose to implement and some do not. 8214 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 8215 8216 # There is some code above in _parse_lambda that handles 8217 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 8218 8219 # The below changes handle 8220 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 8221 8222 # Oracle allows both formats 8223 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 8224 # and Snowflake chose to do the same for familiarity 8225 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 8226 if isinstance(this, exp.AggFunc): 8227 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 8228 8229 if ignore_respect and ignore_respect is not this: 8230 ignore_respect.replace(ignore_respect.this) 8231 this = self.expression(ignore_respect.__class__(this=this)) 8232 8233 this = self._parse_respect_or_ignore_nulls(this) 8234 8235 # bigquery select from window x AS (partition by ...) 8236 if alias: 8237 over = None 8238 self._match(TokenType.ALIAS) 8239 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 8240 return this 8241 else: 8242 over = self._prev.text.upper() 8243 8244 if comments and isinstance(func, exp.Expr): 8245 func.pop_comments() 8246 8247 if not self._match(TokenType.L_PAREN): 8248 return self.expression( 8249 exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments 8250 ) 8251 8252 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 8253 8254 first: bool | None = True if self._match(TokenType.FIRST) else None 8255 if self._match_text_seq("LAST"): 8256 first = False 8257 8258 partition, order = self._parse_partition_and_order() 8259 kind = ( 8260 self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS") 8261 ) and self._prev.text 8262 8263 if kind: 8264 self._match(TokenType.BETWEEN) 8265 start = self._parse_window_spec() 8266 8267 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 8268 exclude = ( 8269 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 8270 if self._match_text_seq("EXCLUDE") 8271 else None 8272 ) 8273 8274 spec = self.expression( 8275 exp.WindowSpec( 8276 kind=kind, 8277 start=start["value"], 8278 start_side=start["side"], 8279 end=end.get("value"), 8280 end_side=end.get("side"), 8281 exclude=exclude, 8282 ) 8283 ) 8284 else: 8285 spec = None 8286 8287 self._match_r_paren() 8288 8289 window = self.expression( 8290 exp.Window( 8291 this=this, 8292 partition_by=partition, 8293 order=order, 8294 spec=spec, 8295 alias=window_alias, 8296 over=over, 8297 first=first, 8298 ), 8299 comments=comments, 8300 ) 8301 8302 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 8303 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 8304 return self._parse_window(window, alias=alias) 8305 8306 return window 8307 8308 def _parse_partition_and_order( 8309 self, 8310 ) -> tuple[list[exp.Expr], exp.Expr | None]: 8311 return self._parse_partition_by(), self._parse_order() 8312 8313 def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]: 8314 self._match(TokenType.BETWEEN) 8315 8316 return { 8317 "value": ( 8318 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 8319 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 8320 or self._parse_bitwise() 8321 ), 8322 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 8323 } 8324 8325 def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None: 8326 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 8327 # so this section tries to parse the clause version and if it fails, it treats the token 8328 # as an identifier (alias) 8329 if self._can_parse_limit_or_offset(): 8330 return this 8331 8332 # WINDOW is in ID_VAR_TOKENS, so it can be consumed as an implicit alias. Detect the 8333 # named-window clause shape (`WINDOW <ident> AS (...)`) and avoid swallowing it. 8334 if self._can_parse_named_window(): 8335 return this 8336 8337 any_token = self._match(TokenType.ALIAS) 8338 comments = self._prev_comments 8339 8340 if explicit and not any_token: 8341 return this 8342 8343 if self._match(TokenType.L_PAREN): 8344 aliases = self.expression( 8345 exp.Aliases( 8346 this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token)) 8347 ), 8348 comments=comments, 8349 ) 8350 self._match_r_paren(aliases) 8351 return aliases 8352 8353 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 8354 self.STRING_ALIASES and self._parse_string_as_identifier() 8355 ) 8356 8357 if alias: 8358 comments.extend(alias.pop_comments()) 8359 this = self.expression(exp.Alias(this=this, alias=alias), comments=comments) 8360 column = this.this 8361 8362 # Moves the comment next to the alias in `expr /* comment */ AS alias` 8363 if not this.comments and column and column.comments: 8364 this.comments = column.pop_comments() 8365 8366 return this 8367 8368 def _parse_id_var( 8369 self, 8370 any_token: bool = True, 8371 tokens: t.Collection[TokenType] | None = None, 8372 ) -> exp.Expr | None: 8373 expression = self._parse_identifier() 8374 if not expression and ( 8375 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 8376 ): 8377 quoted = self._prev.token_type == TokenType.STRING 8378 expression = self._identifier_expression(quoted=quoted) 8379 8380 return expression 8381 8382 def _parse_string(self) -> exp.Expr | None: 8383 if self._match_set(self.STRING_PARSERS): 8384 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 8385 return self._parse_placeholder() 8386 8387 def _parse_string_as_identifier(self) -> exp.Identifier | None: 8388 if not self._match(TokenType.STRING): 8389 return None 8390 output = exp.to_identifier(self._prev.text, quoted=True) 8391 output.update_positions(self._prev) 8392 return output 8393 8394 def _parse_number(self) -> exp.Expr | None: 8395 if self._match_set(self.NUMERIC_PARSERS): 8396 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 8397 return self._parse_placeholder() 8398 8399 def _parse_identifier(self) -> exp.Expr | None: 8400 if self._match(TokenType.IDENTIFIER): 8401 return self._identifier_expression(quoted=True) 8402 return self._parse_placeholder() 8403 8404 def _parse_var( 8405 self, 8406 any_token: bool = False, 8407 tokens: t.Collection[TokenType] | None = None, 8408 upper: bool = False, 8409 ) -> exp.Expr | None: 8410 if ( 8411 (any_token and self._advance_any()) 8412 or self._match(TokenType.VAR) 8413 or (self._match_set(tokens) if tokens else False) 8414 ): 8415 return self.expression( 8416 exp.Var(this=self._prev.text.upper() if upper else self._prev.text) 8417 ) 8418 return self._parse_placeholder() 8419 8420 def _advance_any(self, ignore_reserved: bool = False) -> Token | None: 8421 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 8422 self._advance() 8423 return self._prev 8424 return None 8425 8426 def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None: 8427 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 8428 8429 def _parse_primary_or_var(self) -> exp.Expr | None: 8430 return self._parse_primary() or self._parse_var(any_token=True) 8431 8432 def _parse_null(self) -> exp.Expr | None: 8433 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 8434 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 8435 return self._parse_placeholder() 8436 8437 def _parse_boolean(self) -> exp.Expr | None: 8438 if self._match(TokenType.TRUE): 8439 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 8440 if self._match(TokenType.FALSE): 8441 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 8442 return self._parse_placeholder() 8443 8444 def _parse_star(self) -> exp.Expr | None: 8445 if self._match(TokenType.STAR): 8446 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 8447 return self._parse_placeholder() 8448 8449 def _parse_parameter(self) -> exp.Parameter: 8450 this = self._parse_identifier() or self._parse_primary_or_var() 8451 return self.expression(exp.Parameter(this=this)) 8452 8453 def _parse_placeholder(self) -> exp.Expr | None: 8454 if self._match_set(self.PLACEHOLDER_PARSERS): 8455 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 8456 if placeholder: 8457 return placeholder 8458 self._advance(-1) 8459 return None 8460 8461 def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None: 8462 if not self._match_texts(keywords): 8463 return None 8464 if self._match(TokenType.L_PAREN, advance=False): 8465 return self._parse_wrapped_csv(self._parse_expression) 8466 8467 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 8468 return [expression] if expression else None 8469 8470 def _parse_csv( 8471 self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA 8472 ) -> list[T]: 8473 parse_result = parse_method() 8474 items = [parse_result] if parse_result is not None else [] 8475 8476 while self._match(sep): 8477 if isinstance(parse_result, exp.Expr): 8478 self._add_comments(parse_result) 8479 parse_result = parse_method() 8480 if parse_result is not None: 8481 items.append(parse_result) 8482 8483 return items 8484 8485 def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]: 8486 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 8487 8488 def _parse_wrapped_csv( 8489 self, 8490 parse_method: t.Callable[[], T | None], 8491 sep: TokenType = TokenType.COMMA, 8492 optional: bool = False, 8493 ) -> list[T]: 8494 return self._parse_wrapped( 8495 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 8496 ) 8497 8498 def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T: 8499 wrapped = self._match(TokenType.L_PAREN) 8500 if not wrapped and not optional: 8501 self.raise_error("Expecting (") 8502 parse_result = parse_method() 8503 if wrapped: 8504 self._match_r_paren() 8505 return parse_result 8506 8507 def _parse_expressions(self) -> list[exp.Expr]: 8508 return self._parse_csv(self._parse_expression) 8509 8510 def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None: 8511 return ( 8512 self._parse_set_operations( 8513 self._parse_alias(self._parse_assignment(), explicit=True) 8514 if alias 8515 else self._parse_assignment() 8516 ) 8517 or self._parse_select() 8518 ) 8519 8520 def _parse_ddl_select(self) -> exp.Expr | None: 8521 return self._parse_query_modifiers( 8522 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 8523 ) 8524 8525 def _parse_transaction(self) -> exp.Transaction | exp.Command: 8526 this = None 8527 if self._match_texts(self.TRANSACTION_KIND): 8528 this = self._prev.text 8529 8530 self._match_texts(("TRANSACTION", "WORK")) 8531 8532 modes = [] 8533 while True: 8534 mode = [] 8535 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 8536 mode.append(self._prev.text) 8537 8538 if mode: 8539 modes.append(" ".join(mode)) 8540 if not self._match(TokenType.COMMA): 8541 break 8542 8543 return self.expression(exp.Transaction(this=this, modes=modes)) 8544 8545 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 8546 chain = None 8547 savepoint = None 8548 is_rollback = self._prev.token_type == TokenType.ROLLBACK 8549 8550 self._match_texts(("TRANSACTION", "WORK")) 8551 8552 if self._match_text_seq("TO"): 8553 self._match_text_seq("SAVEPOINT") 8554 savepoint = self._parse_id_var() 8555 8556 if self._match(TokenType.AND): 8557 chain = not self._match_text_seq("NO") 8558 self._match_text_seq("CHAIN") 8559 8560 if is_rollback: 8561 return self.expression(exp.Rollback(savepoint=savepoint)) 8562 8563 return self.expression(exp.Commit(chain=chain)) 8564 8565 def _parse_refresh(self) -> exp.Refresh | exp.Command: 8566 if self._match(TokenType.TABLE): 8567 kind = "TABLE" 8568 elif self._match_text_seq("MATERIALIZED", "VIEW"): 8569 kind = "MATERIALIZED VIEW" 8570 else: 8571 kind = "" 8572 8573 this = self._parse_string() or self._parse_table() 8574 if not kind and not isinstance(this, exp.Literal): 8575 return self._parse_as_command(self._prev) 8576 8577 return self.expression(exp.Refresh(this=this, kind=kind)) 8578 8579 def _parse_column_def_with_exists(self): 8580 start = self._index 8581 self._match(TokenType.COLUMN) 8582 8583 exists_column = self._parse_exists(not_=True) 8584 expression = self._parse_field_def() 8585 8586 if not isinstance(expression, exp.ColumnDef): 8587 self._retreat(start) 8588 return None 8589 8590 expression.set("exists", exists_column) 8591 8592 return expression 8593 8594 def _parse_add_column(self) -> exp.ColumnDef | None: 8595 if not self._prev.text.upper() == "ADD": 8596 return None 8597 8598 return self._parse_column_def_with_exists() 8599 8600 def _parse_drop_column(self) -> exp.Drop | exp.Command | None: 8601 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8602 if drop and not isinstance(drop, exp.Command): 8603 drop.set("kind", drop.args.get("kind", "COLUMN")) 8604 return drop 8605 8606 def _parse_alter_drop_action(self) -> exp.Expr | None: 8607 return self._parse_drop_column() 8608 8609 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8610 def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition: 8611 return self.expression( 8612 exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists) 8613 ) 8614 8615 def _parse_alter_table_add(self) -> list[exp.Expr]: 8616 def _parse_add_alteration() -> exp.Expr | None: 8617 self._match_text_seq("ADD") 8618 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8619 return self.expression( 8620 exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint)) 8621 ) 8622 8623 column_def = self._parse_add_column() 8624 if isinstance(column_def, exp.ColumnDef): 8625 return column_def 8626 8627 exists = self._parse_exists(not_=True) 8628 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8629 return self.expression( 8630 exp.AddPartition( 8631 exists=exists, 8632 this=self._parse_field(any_token=True), 8633 location=self._match_text_seq("LOCATION", advance=False) 8634 and self._parse_property(), 8635 ) 8636 ) 8637 8638 return None 8639 8640 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8641 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8642 or self._match_text_seq("COLUMNS") 8643 ): 8644 schema = self._parse_schema() 8645 8646 return ( 8647 ensure_list(schema) 8648 if schema 8649 else self._parse_csv(self._parse_column_def_with_exists) 8650 ) 8651 8652 return self._parse_csv(_parse_add_alteration) 8653 8654 def _parse_alter_table_alter(self) -> exp.Expr | None: 8655 if self._match_texts(self.ALTER_ALTER_PARSERS): 8656 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8657 8658 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8659 # keyword after ALTER we default to parsing this statement 8660 self._match(TokenType.COLUMN) 8661 column = self._parse_field(any_token=True) 8662 8663 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8664 return self.expression(exp.AlterColumn(this=column, drop=True)) 8665 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8666 return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction())) 8667 if self._match(TokenType.COMMENT): 8668 return self.expression(exp.AlterColumn(this=column, comment=self._parse_string())) 8669 if self._match_text_seq("DROP", "NOT", "NULL"): 8670 return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True)) 8671 if self._match_text_seq("SET", "NOT", "NULL"): 8672 return self.expression(exp.AlterColumn(this=column, allow_null=False)) 8673 8674 if self._match_text_seq("SET", "VISIBLE"): 8675 return self.expression(exp.AlterColumn(this=column, visible="VISIBLE")) 8676 if self._match_text_seq("SET", "INVISIBLE"): 8677 return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE")) 8678 8679 self._match_text_seq("SET", "DATA") 8680 self._match_text_seq("TYPE") 8681 return self.expression( 8682 exp.AlterColumn( 8683 this=column, 8684 dtype=self._parse_types(), 8685 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8686 using=self._match(TokenType.USING) and self._parse_disjunction(), 8687 ) 8688 ) 8689 8690 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8691 if self._match_texts(("ALL", "EVEN", "AUTO")): 8692 return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper()))) 8693 8694 self._match_text_seq("KEY", "DISTKEY") 8695 return self.expression(exp.AlterDistStyle(this=self._parse_column())) 8696 8697 def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey: 8698 if compound: 8699 self._match_text_seq("SORTKEY") 8700 8701 if self._match(TokenType.L_PAREN, advance=False): 8702 return self.expression( 8703 exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound) 8704 ) 8705 8706 self._match_texts(("AUTO", "NONE")) 8707 return self.expression( 8708 exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound) 8709 ) 8710 8711 def _parse_alter_table_drop(self) -> list[exp.Expr]: 8712 index = self._index - 1 8713 8714 partition_exists = self._parse_exists() 8715 if self._match(TokenType.PARTITION, advance=False): 8716 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8717 8718 self._retreat(index) 8719 return self._parse_csv(self._parse_alter_drop_action) 8720 8721 def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None: 8722 if self._match(TokenType.COLUMN) or ( 8723 not self.ALTER_RENAME_REQUIRES_COLUMN and not self._match_text_seq("TO", advance=False) 8724 ): 8725 exists = self._parse_exists() 8726 old_column = self._parse_column() 8727 to = self._match_text_seq("TO") 8728 new_column = self._parse_column() 8729 8730 if old_column is None or not to or new_column is None: 8731 return None 8732 8733 return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists)) 8734 8735 self._match_text_seq("TO") 8736 return self.expression(exp.AlterRename(this=self._parse_table(schema=True))) 8737 8738 def _parse_alter_table_set(self) -> exp.AlterSet: 8739 alter_set = self.expression(exp.AlterSet()) 8740 8741 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8742 "TABLE", "PROPERTIES" 8743 ): 8744 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8745 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8746 alter_set.set("expressions", [self._parse_assignment()]) 8747 elif self._match_texts(("LOGGED", "UNLOGGED")): 8748 alter_set.set("option", exp.var(self._prev.text.upper())) 8749 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8750 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8751 elif self._match_text_seq("LOCATION"): 8752 alter_set.set("location", self._parse_field()) 8753 elif self._match_text_seq("ACCESS", "METHOD"): 8754 alter_set.set("access_method", self._parse_field()) 8755 elif self._match_text_seq("TABLESPACE"): 8756 alter_set.set("tablespace", self._parse_field()) 8757 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8758 alter_set.set("file_format", [self._parse_field()]) 8759 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8760 alter_set.set("file_format", self._parse_wrapped_options()) 8761 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8762 alter_set.set("copy_options", self._parse_wrapped_options()) 8763 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8764 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8765 else: 8766 if self._match_text_seq("SERDE"): 8767 alter_set.set("serde", self._parse_field()) 8768 8769 properties = self._parse_wrapped(self._parse_properties, optional=True) 8770 alter_set.set("expressions", [properties]) 8771 8772 return alter_set 8773 8774 def _parse_alter_session(self) -> exp.AlterSession: 8775 """Parse ALTER SESSION SET/UNSET statements.""" 8776 if self._match(TokenType.SET): 8777 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8778 return self.expression(exp.AlterSession(expressions=expressions, unset=False)) 8779 8780 self._match_text_seq("UNSET") 8781 expressions = self._parse_csv( 8782 lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True))) 8783 ) 8784 return self.expression(exp.AlterSession(expressions=expressions, unset=True)) 8785 8786 def _parse_alter(self) -> exp.Alter | exp.Command: 8787 start = self._prev 8788 8789 iceberg = self._match_text_seq("ICEBERG") 8790 8791 alter_token = self._match_set(self.ALTERABLES) and self._prev 8792 if not alter_token: 8793 return self._parse_as_command(start) 8794 if iceberg and alter_token.token_type != TokenType.TABLE: 8795 return self._parse_as_command(start) 8796 8797 exists = self._parse_exists() 8798 only = self._match_text_seq("ONLY") 8799 8800 if alter_token.token_type == TokenType.SESSION: 8801 this = None 8802 check = None 8803 cluster = None 8804 else: 8805 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8806 check = self._match_text_seq("WITH", "CHECK") 8807 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8808 8809 if self._next: 8810 self._advance() 8811 8812 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8813 if parser: 8814 actions = ensure_list(parser(self)) 8815 not_valid = self._match_text_seq("NOT", "VALID") 8816 options = self._parse_csv(self._parse_property) 8817 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8818 8819 if not self._curr and actions: 8820 return self.expression( 8821 exp.Alter( 8822 this=this, 8823 kind=alter_token.text.upper(), 8824 exists=exists, 8825 actions=actions, 8826 only=only, 8827 options=options, 8828 cluster=cluster, 8829 not_valid=not_valid, 8830 check=check, 8831 cascade=cascade, 8832 iceberg=iceberg, 8833 ) 8834 ) 8835 8836 return self._parse_as_command(start) 8837 8838 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8839 start = self._prev 8840 # https://duckdb.org/docs/sql/statements/analyze 8841 if not self._curr: 8842 return self.expression(exp.Analyze()) 8843 8844 options = [] 8845 while self._match_texts(self.ANALYZE_STYLES): 8846 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8847 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8848 else: 8849 options.append(self._prev.text.upper()) 8850 8851 this: exp.Expr | None = None 8852 inner_expression: exp.Expr | None = None 8853 8854 kind = self._curr.text.upper() if self._curr else None 8855 8856 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8857 this = self._parse_table_parts() 8858 elif self._match_text_seq("TABLES"): 8859 if self._match_set((TokenType.FROM, TokenType.IN)): 8860 kind = f"{kind} {self._prev.text.upper()}" 8861 this = self._parse_table(schema=True, is_db_reference=True) 8862 elif self._match_text_seq("DATABASE"): 8863 this = self._parse_table(schema=True, is_db_reference=True) 8864 elif self._match_text_seq("CLUSTER"): 8865 this = self._parse_table() 8866 # Try matching inner expr keywords before fallback to parse table. 8867 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8868 kind = None 8869 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8870 else: 8871 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8872 kind = None 8873 this = self._parse_table_parts() 8874 8875 partition = self._try_parse(self._parse_partition) 8876 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8877 return self._parse_as_command(start) 8878 8879 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8880 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8881 "WITH", "ASYNC", "MODE" 8882 ): 8883 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8884 else: 8885 mode = None 8886 8887 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8888 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8889 8890 properties = self._parse_properties() 8891 return self.expression( 8892 exp.Analyze( 8893 kind=kind, 8894 this=this, 8895 mode=mode, 8896 partition=partition, 8897 properties=properties, 8898 expression=inner_expression, 8899 options=options, 8900 ) 8901 ) 8902 8903 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8904 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8905 this = None 8906 kind = self._prev.text.upper() 8907 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8908 expressions = [] 8909 8910 if not self._match_text_seq("STATISTICS"): 8911 self.raise_error("Expecting token STATISTICS") 8912 8913 if self._match_text_seq("NOSCAN"): 8914 this = "NOSCAN" 8915 elif self._match(TokenType.FOR): 8916 if self._match_text_seq("ALL", "COLUMNS"): 8917 this = "FOR ALL COLUMNS" 8918 if self._match_texts("COLUMNS"): 8919 this = "FOR COLUMNS" 8920 expressions = self._parse_csv(self._parse_column_reference) 8921 elif self._match_text_seq("SAMPLE"): 8922 sample = self._parse_number() 8923 expressions = [ 8924 self.expression( 8925 exp.AnalyzeSample( 8926 sample=sample, 8927 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8928 ) 8929 ) 8930 ] 8931 8932 return self.expression( 8933 exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions) 8934 ) 8935 8936 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8937 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8938 kind = None 8939 this = None 8940 expression: exp.Expr | None = None 8941 if self._match_text_seq("REF", "UPDATE"): 8942 kind = "REF" 8943 this = "UPDATE" 8944 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8945 this = "UPDATE SET DANGLING TO NULL" 8946 elif self._match_text_seq("STRUCTURE"): 8947 kind = "STRUCTURE" 8948 if self._match_text_seq("CASCADE", "FAST"): 8949 this = "CASCADE FAST" 8950 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8951 ("ONLINE", "OFFLINE") 8952 ): 8953 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8954 expression = self._parse_into() 8955 8956 return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression)) 8957 8958 def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None: 8959 this = self._prev.text.upper() 8960 if self._match_text_seq("COLUMNS"): 8961 return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}")) 8962 return None 8963 8964 def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None: 8965 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8966 if self._match_text_seq("STATISTICS"): 8967 return self.expression(exp.AnalyzeDelete(kind=kind)) 8968 return None 8969 8970 def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None: 8971 if self._match_text_seq("CHAINED", "ROWS"): 8972 return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into())) 8973 return None 8974 8975 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8976 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8977 this = self._prev.text.upper() 8978 expression: exp.Expr | None = None 8979 expressions = [] 8980 update_options = None 8981 8982 if self._match_text_seq("HISTOGRAM", "ON"): 8983 expressions = self._parse_csv(self._parse_column_reference) 8984 with_expressions = [] 8985 while self._match(TokenType.WITH): 8986 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8987 if self._match_texts(("SYNC", "ASYNC")): 8988 if self._match_text_seq("MODE", advance=False): 8989 with_expressions.append(f"{self._prev.text.upper()} MODE") 8990 self._advance() 8991 else: 8992 buckets = self._parse_number() 8993 if self._match_text_seq("BUCKETS"): 8994 with_expressions.append(f"{buckets} BUCKETS") 8995 if with_expressions: 8996 expression = self.expression(exp.AnalyzeWith(expressions=with_expressions)) 8997 8998 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8999 TokenType.UPDATE, advance=False 9000 ): 9001 update_options = self._prev.text.upper() 9002 self._advance() 9003 elif self._match_text_seq("USING", "DATA"): 9004 expression = self.expression(exp.UsingData(this=self._parse_string())) 9005 9006 return self.expression( 9007 exp.AnalyzeHistogram( 9008 this=this, 9009 expressions=expressions, 9010 expression=expression, 9011 update_options=update_options, 9012 ) 9013 ) 9014 9015 def _parse_merge(self) -> exp.Merge: 9016 self._match(TokenType.INTO) 9017 target = self._parse_table() 9018 9019 if target and self._match(TokenType.ALIAS, advance=False): 9020 target.set("alias", self._parse_table_alias()) 9021 9022 self._match(TokenType.USING) 9023 using = self._parse_table() 9024 9025 return self.expression( 9026 exp.Merge( 9027 this=target, 9028 using=using, 9029 on=self._match(TokenType.ON) and self._parse_disjunction(), 9030 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 9031 whens=self._parse_when_matched(), 9032 returning=self._parse_returning(), 9033 ) 9034 ) 9035 9036 def _parse_when_matched(self) -> exp.Whens: 9037 whens = [] 9038 9039 while self._match(TokenType.WHEN): 9040 matched = not self._match(TokenType.NOT) 9041 self._match_text_seq("MATCHED") 9042 source = ( 9043 False 9044 if self._match_text_seq("BY", "TARGET") 9045 else self._match_text_seq("BY", "SOURCE") 9046 ) 9047 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 9048 9049 self._match(TokenType.THEN) 9050 9051 if self._match(TokenType.INSERT): 9052 this = self._parse_star() 9053 if this: 9054 then: exp.Expr | None = self.expression(exp.Insert(this=this)) 9055 else: 9056 then = self.expression( 9057 exp.Insert( 9058 this=exp.var("ROW") 9059 if self._match_text_seq("ROW") 9060 else self._parse_value(values=False), 9061 expression=self._match_text_seq("VALUES") and self._parse_value(), 9062 where=self._parse_where(), 9063 ) 9064 ) 9065 elif self._match(TokenType.UPDATE): 9066 expressions = self._parse_star() 9067 if expressions: 9068 then = self.expression(exp.Update(expressions=expressions)) 9069 else: 9070 then = self.expression( 9071 exp.Update( 9072 expressions=self._match(TokenType.SET) 9073 and self._parse_csv(self._parse_equality), 9074 where=self._parse_where(), 9075 ) 9076 ) 9077 elif self._match(TokenType.DELETE): 9078 then = self.expression(exp.Var(this=self._prev.text)) 9079 else: 9080 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 9081 9082 whens.append( 9083 self.expression( 9084 exp.When(matched=matched, source=source, condition=condition, then=then) 9085 ) 9086 ) 9087 return self.expression(exp.Whens(expressions=whens)) 9088 9089 def _parse_show(self) -> exp.Expr | None: 9090 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 9091 if parser: 9092 return parser(self) 9093 return self._parse_as_command(self._prev) 9094 9095 def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None: 9096 index = self._index 9097 9098 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 9099 return self._parse_set_transaction(global_=kind == "GLOBAL") 9100 9101 left = self._parse_primary() or self._parse_column() 9102 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 9103 9104 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 9105 self._retreat(index) 9106 return None 9107 9108 right = self._parse_statement() or self._parse_id_var() 9109 if isinstance(right, (exp.Column, exp.Identifier)): 9110 right = exp.var(right.name) 9111 9112 this = self.expression(exp.EQ(this=left, expression=right)) 9113 return self.expression(exp.SetItem(this=this, kind=kind)) 9114 9115 def _parse_set_transaction(self, global_: bool = False) -> exp.Expr: 9116 self._match_text_seq("TRANSACTION") 9117 characteristics = self._parse_csv( 9118 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 9119 ) 9120 return self.expression( 9121 exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_) 9122 ) 9123 9124 def _parse_set_item(self) -> exp.Expr | None: 9125 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 9126 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 9127 9128 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 9129 index = self._index 9130 set_ = self.expression( 9131 exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag) 9132 ) 9133 9134 if self._curr: 9135 self._retreat(index) 9136 return self._parse_as_command(self._prev) 9137 9138 return set_ 9139 9140 def _parse_var_from_options( 9141 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 9142 ) -> exp.Var | None: 9143 start = self._curr 9144 if not start: 9145 return None 9146 9147 option = start.text.upper() 9148 continuations = options.get(option) 9149 9150 index = self._index 9151 self._advance() 9152 for keywords in continuations or []: 9153 if isinstance(keywords, str): 9154 keywords = (keywords,) 9155 9156 if self._match_text_seq(*keywords): 9157 option = f"{option} {' '.join(keywords)}" 9158 break 9159 else: 9160 if continuations or continuations is None: 9161 if raise_unmatched: 9162 self.raise_error(f"Unknown option {option}") 9163 9164 self._retreat(index) 9165 return None 9166 9167 return exp.var(option) 9168 9169 def _parse_as_command(self, start: Token) -> exp.Command: 9170 while self._curr: 9171 self._advance() 9172 text = self._find_sql(start, self._prev) 9173 size = len(start.text) 9174 self._warn_unsupported() 9175 return exp.Command(this=text[:size], expression=text[size:]) 9176 9177 def _parse_dict_property(self, this: str) -> exp.DictProperty: 9178 settings = [] 9179 9180 self._match_l_paren() 9181 kind = self._parse_id_var() 9182 9183 if self._match(TokenType.L_PAREN): 9184 while True: 9185 key = self._parse_id_var() 9186 value = self._parse_function() or self._parse_primary_or_var() 9187 if not key and value is None: 9188 break 9189 settings.append(self.expression(exp.DictSubProperty(this=key, value=value))) 9190 self._match(TokenType.R_PAREN) 9191 9192 self._match_r_paren() 9193 9194 return self.expression( 9195 exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings) 9196 ) 9197 9198 def _parse_dict_range(self, this: str) -> exp.DictRange: 9199 self._match_l_paren() 9200 has_min = self._match_text_seq("MIN") 9201 if has_min: 9202 min = self._parse_var() or self._parse_primary() 9203 self._match_text_seq("MAX") 9204 max = self._parse_var() or self._parse_primary() 9205 else: 9206 max = self._parse_var() or self._parse_primary() 9207 min = exp.Literal.number(0) 9208 self._match_r_paren() 9209 return self.expression(exp.DictRange(this=this, min=min, max=max)) 9210 9211 def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None: 9212 index = self._index 9213 expression = self._parse_column() 9214 position = self._match(TokenType.COMMA) and self._parse_column() 9215 9216 if not self._match(TokenType.IN): 9217 self._retreat(index - 1) 9218 return None 9219 iterator = self._parse_column() 9220 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 9221 return self.expression( 9222 exp.Comprehension( 9223 this=this, 9224 expression=expression, 9225 position=position, 9226 iterator=iterator, 9227 condition=condition, 9228 ) 9229 ) 9230 9231 def _parse_heredoc(self) -> exp.Heredoc | None: 9232 if self._match(TokenType.HEREDOC_STRING): 9233 return self.expression(exp.Heredoc(this=self._prev.text)) 9234 9235 if not self._match_text_seq("$"): 9236 return None 9237 9238 tags = ["$"] 9239 tag_text = None 9240 9241 if self._is_connected(): 9242 self._advance() 9243 tags.append(self._prev.text.upper()) 9244 else: 9245 self.raise_error("No closing $ found") 9246 9247 if tags[-1] != "$": 9248 if self._is_connected() and self._match_text_seq("$"): 9249 tag_text = tags[-1] 9250 tags.append("$") 9251 else: 9252 self.raise_error("No closing $ found") 9253 9254 heredoc_start = self._curr 9255 9256 while self._curr: 9257 if self._match_text_seq(*tags, advance=False): 9258 this = self._find_sql(heredoc_start, self._prev) 9259 self._advance(len(tags)) 9260 return self.expression(exp.Heredoc(this=this, tag=tag_text)) 9261 9262 self._advance() 9263 9264 self.raise_error(f"No closing {''.join(tags)} found") 9265 return None 9266 9267 def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None: 9268 if not self._curr: 9269 return None 9270 9271 index = self._index 9272 this = [] 9273 while True: 9274 # The current token might be multiple words 9275 curr = self._curr.text.upper() 9276 key = curr.split(" ") 9277 this.append(curr) 9278 9279 self._advance() 9280 result, trie = in_trie(trie, key) 9281 if result == TrieResult.FAILED: 9282 break 9283 9284 if result == TrieResult.EXISTS: 9285 subparser = parsers[" ".join(this)] 9286 return subparser 9287 9288 self._retreat(index) 9289 return None 9290 9291 def _match_l_paren(self, expression: exp.Expr | None = None) -> None: 9292 if not self._match(TokenType.L_PAREN, expression=expression): 9293 self.raise_error("Expecting (") 9294 9295 def _match_r_paren(self, expression: exp.Expr | None = None) -> None: 9296 if not self._match(TokenType.R_PAREN, expression=expression): 9297 self.raise_error("Expecting )") 9298 9299 def _replace_lambda( 9300 self, node: exp.Expr | None, expressions: list[exp.Expr] 9301 ) -> exp.Expr | None: 9302 if not node: 9303 return node 9304 9305 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 9306 9307 for column in node.find_all(exp.Column): 9308 typ = lambda_types.get(column.parts[0].name) 9309 if typ is not None: 9310 dot_or_id = column.to_dot() if column.table else column.this 9311 9312 if typ: 9313 dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ)) 9314 9315 parent = column.parent 9316 9317 while isinstance(parent, exp.Dot): 9318 if not isinstance(parent.parent, exp.Dot): 9319 parent.replace(dot_or_id) 9320 break 9321 parent = parent.parent 9322 else: 9323 if column is node: 9324 node = dot_or_id 9325 else: 9326 column.replace(dot_or_id) 9327 return node 9328 9329 def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr: 9330 start = self._prev 9331 9332 # Not to be confused with TRUNCATE(number, decimals) function call 9333 if self._match(TokenType.L_PAREN): 9334 self._retreat(self._index - 2) 9335 return self._parse_function() 9336 9337 # Clickhouse supports TRUNCATE DATABASE as well 9338 is_database = self._match(TokenType.DATABASE) 9339 9340 self._match(TokenType.TABLE) 9341 9342 exists = self._parse_exists(not_=False) 9343 9344 expressions = self._parse_csv( 9345 lambda: self._parse_table(schema=True, is_db_reference=is_database) 9346 ) 9347 9348 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 9349 9350 if self._match_text_seq("RESTART", "IDENTITY"): 9351 identity = "RESTART" 9352 elif self._match_text_seq("CONTINUE", "IDENTITY"): 9353 identity = "CONTINUE" 9354 else: 9355 identity = None 9356 9357 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 9358 option = self._prev.text 9359 else: 9360 option = None 9361 9362 partition = self._parse_partition() 9363 9364 # Fallback case 9365 if self._curr: 9366 return self._parse_as_command(start) 9367 9368 return self.expression( 9369 exp.TruncateTable( 9370 expressions=expressions, 9371 is_database=is_database, 9372 exists=exists, 9373 cluster=cluster, 9374 identity=identity, 9375 option=option, 9376 partition=partition, 9377 ) 9378 ) 9379 9380 def _parse_with_operator(self) -> exp.Expr | None: 9381 this = self._parse_ordered(self._parse_opclass) 9382 9383 if not self._match(TokenType.WITH): 9384 return this 9385 9386 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 9387 9388 return self.expression(exp.WithOperator(this=this, op=op)) 9389 9390 def _parse_wrapped_options(self) -> list[exp.Expr]: 9391 self._match(TokenType.EQ) 9392 self._match(TokenType.L_PAREN) 9393 9394 opts: list[exp.Expr] = [] 9395 option: exp.Expr | list[exp.Expr] | None 9396 while self._curr and not self._match(TokenType.R_PAREN): 9397 if self._match_text_seq("FORMAT_NAME", "="): 9398 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 9399 option = self._parse_format_name() 9400 else: 9401 option = self._parse_property() 9402 9403 if option is None: 9404 self.raise_error("Unable to parse option") 9405 break 9406 9407 opts.extend(ensure_list(option)) 9408 9409 return opts 9410 9411 def _parse_copy_parameters(self) -> list[exp.CopyParameter]: 9412 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 9413 9414 options = [] 9415 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 9416 option = self._parse_var(any_token=True) 9417 prev = self._prev.text.upper() 9418 9419 # Different dialects might separate options and values by white space, "=" and "AS" 9420 self._match(TokenType.EQ) 9421 self._match(TokenType.ALIAS) 9422 9423 param = self.expression(exp.CopyParameter(this=option)) 9424 9425 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 9426 TokenType.L_PAREN, advance=False 9427 ): 9428 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 9429 param.set("expressions", self._parse_wrapped_options()) 9430 elif prev == "FILE_FORMAT": 9431 # T-SQL's external file format case 9432 param.set("expression", self._parse_field()) 9433 elif ( 9434 prev == "FORMAT" 9435 and self._prev.token_type == TokenType.ALIAS 9436 and self._match_texts(("AVRO", "JSON")) 9437 ): 9438 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 9439 param.set("expression", self._parse_field()) 9440 else: 9441 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 9442 9443 options.append(param) 9444 9445 if sep: 9446 self._match(sep) 9447 9448 return options 9449 9450 def _parse_credentials(self) -> exp.Credentials | None: 9451 expr = self.expression(exp.Credentials()) 9452 9453 if self._match_text_seq("STORAGE_INTEGRATION", "="): 9454 expr.set("storage", self._parse_field()) 9455 if self._match_text_seq("CREDENTIALS"): 9456 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 9457 creds = ( 9458 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 9459 ) 9460 expr.set("credentials", creds) 9461 if self._match_text_seq("ENCRYPTION"): 9462 expr.set("encryption", self._parse_wrapped_options()) 9463 if self._match_text_seq("IAM_ROLE"): 9464 expr.set( 9465 "iam_role", 9466 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 9467 ) 9468 if self._match_text_seq("REGION"): 9469 expr.set("region", self._parse_field()) 9470 9471 return expr 9472 9473 def _parse_file_location(self) -> exp.Expr | None: 9474 return self._parse_field() 9475 9476 def _parse_copy(self) -> exp.Copy | exp.Command: 9477 start = self._prev 9478 9479 self._match(TokenType.INTO) 9480 9481 this = ( 9482 self._parse_select(nested=True, parse_subquery_alias=False) 9483 if self._match(TokenType.L_PAREN, advance=False) 9484 else self._parse_table(schema=True) 9485 ) 9486 9487 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 9488 9489 files = self._parse_csv(self._parse_file_location) 9490 if self._match(TokenType.EQ, advance=False): 9491 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 9492 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 9493 # list via `_parse_wrapped(..)` below. 9494 self._advance(-1) 9495 files = [] 9496 9497 credentials = self._parse_credentials() 9498 9499 self._match_text_seq("WITH") 9500 9501 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 9502 9503 # Fallback case 9504 if self._curr: 9505 return self._parse_as_command(start) 9506 9507 return self.expression( 9508 exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params) 9509 ) 9510 9511 def _parse_normalize(self) -> exp.Normalize: 9512 return self.expression( 9513 exp.Normalize( 9514 this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var() 9515 ) 9516 ) 9517 9518 def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor: 9519 args = self._parse_csv(lambda: self._parse_lambda()) 9520 9521 this = seq_get(args, 0) 9522 decimals = seq_get(args, 1) 9523 9524 return expr_type( 9525 this=this, 9526 decimals=decimals, 9527 to=self._parse_var() if self._match_text_seq("TO") else None, 9528 ) 9529 9530 def _parse_star_ops(self) -> exp.Expr | None: 9531 star_token = self._prev 9532 9533 if self._match_text_seq("COLUMNS", "(", advance=False): 9534 this = self._parse_function() 9535 if isinstance(this, exp.Columns): 9536 this.set("unpack", True) 9537 return this 9538 9539 return self.expression( 9540 exp.Star( 9541 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 9542 replace=self._parse_star_op("REPLACE"), 9543 rename=self._parse_star_op("RENAME"), 9544 ) 9545 ).update_positions(star_token) 9546 9547 def _parse_grant_privilege(self) -> exp.GrantPrivilege | None: 9548 privilege_parts = [] 9549 9550 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 9551 # (end of privilege list) or L_PAREN (start of column list) are met 9552 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 9553 privilege_parts.append(self._curr.text.upper()) 9554 self._advance() 9555 9556 this = exp.var(" ".join(privilege_parts)) 9557 expressions = ( 9558 self._parse_wrapped_csv(self._parse_column) 9559 if self._match(TokenType.L_PAREN, advance=False) 9560 else None 9561 ) 9562 9563 return self.expression(exp.GrantPrivilege(this=this, expressions=expressions)) 9564 9565 def _parse_grant_principal(self) -> exp.GrantPrincipal | None: 9566 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 9567 principal = self._parse_id_var() 9568 9569 if not principal: 9570 return None 9571 9572 return self.expression(exp.GrantPrincipal(this=principal, kind=kind)) 9573 9574 def _parse_grant_revoke_common( 9575 self, 9576 ) -> tuple[list | None, str | None, exp.Expr | None]: 9577 privileges = self._parse_csv(self._parse_grant_privilege) 9578 9579 self._match(TokenType.ON) 9580 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 9581 9582 # Attempt to parse the securable e.g. MySQL allows names 9583 # such as "foo.*", "*.*" which are not easily parseable yet 9584 securable = self._try_parse(self._parse_table_parts) 9585 9586 return privileges, kind, securable 9587 9588 def _parse_grant(self) -> exp.Grant | exp.Command: 9589 start = self._prev 9590 9591 privileges, kind, securable = self._parse_grant_revoke_common() 9592 9593 if not securable or not self._match_text_seq("TO"): 9594 return self._parse_as_command(start) 9595 9596 principals = self._parse_csv(self._parse_grant_principal) 9597 9598 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9599 9600 if self._curr: 9601 return self._parse_as_command(start) 9602 9603 return self.expression( 9604 exp.Grant( 9605 privileges=privileges, 9606 kind=kind, 9607 securable=securable, 9608 principals=principals, 9609 grant_option=grant_option, 9610 ) 9611 ) 9612 9613 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9614 start = self._prev 9615 9616 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9617 9618 privileges, kind, securable = self._parse_grant_revoke_common() 9619 9620 if not securable or not self._match_text_seq("FROM"): 9621 return self._parse_as_command(start) 9622 9623 principals = self._parse_csv(self._parse_grant_principal) 9624 9625 cascade = None 9626 if self._match_texts(("CASCADE", "RESTRICT")): 9627 cascade = self._prev.text.upper() 9628 9629 if self._curr: 9630 return self._parse_as_command(start) 9631 9632 return self.expression( 9633 exp.Revoke( 9634 privileges=privileges, 9635 kind=kind, 9636 securable=securable, 9637 principals=principals, 9638 grant_option=grant_option, 9639 cascade=cascade, 9640 ) 9641 ) 9642 9643 def _parse_overlay(self) -> exp.Overlay: 9644 def _parse_overlay_arg(text: str) -> exp.Expr | None: 9645 return ( 9646 self._parse_bitwise() 9647 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9648 else None 9649 ) 9650 9651 return self.expression( 9652 exp.Overlay( 9653 this=self._parse_bitwise(), 9654 expression=_parse_overlay_arg("PLACING"), 9655 from_=_parse_overlay_arg("FROM"), 9656 for_=_parse_overlay_arg("FOR"), 9657 ) 9658 ) 9659 9660 def _parse_format_name(self) -> exp.Property: 9661 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9662 # for FILE_FORMAT = <format_name> 9663 return self.expression( 9664 exp.Property( 9665 this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts() 9666 ) 9667 ) 9668 9669 def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc: 9670 args: list[exp.Expr] = [] 9671 9672 if self._match(TokenType.DISTINCT): 9673 args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()]))) 9674 self._match(TokenType.COMMA) 9675 9676 args.extend(self._parse_function_args()) 9677 9678 return self.expression( 9679 expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2)) 9680 ) 9681 9682 def _identifier_expression( 9683 self, token: Token | None = None, quoted: bool | None = None 9684 ) -> exp.Identifier: 9685 token = token or self._prev 9686 return self.expression(exp.Identifier(this=token.text, quoted=quoted), token) 9687 9688 def _build_pipe_cte( 9689 self, 9690 query: exp.Query, 9691 expressions: list[exp.Expr], 9692 alias_cte: exp.TableAlias | None = None, 9693 ) -> exp.Select: 9694 new_cte: str | exp.TableAlias | None 9695 if alias_cte: 9696 new_cte = alias_cte 9697 else: 9698 self._pipe_cte_counter += 1 9699 new_cte = f"__tmp{self._pipe_cte_counter}" 9700 9701 with_ = query.args.get("with_") 9702 ctes = with_.pop() if with_ else None 9703 9704 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9705 if ctes: 9706 new_select.set("with_", ctes) 9707 9708 return new_select.with_(new_cte, as_=query, copy=False) 9709 9710 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9711 select = self._parse_select(consume_pipe=False) 9712 if not select: 9713 return query 9714 9715 return self._build_pipe_cte( 9716 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9717 ) 9718 9719 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9720 limit = self._parse_limit() 9721 offset = self._parse_offset() 9722 if limit: 9723 curr_limit = query.args.get("limit", limit) 9724 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9725 query.limit(limit, copy=False) 9726 if offset: 9727 curr_offset = query.args.get("offset") 9728 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9729 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9730 9731 return query 9732 9733 def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None: 9734 this = self._parse_disjunction() 9735 if self._match_text_seq("GROUP", "AND", advance=False): 9736 return this 9737 9738 this = self._parse_alias(this) 9739 9740 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9741 return self._parse_ordered(lambda: this) 9742 9743 return this 9744 9745 def _parse_pipe_syntax_aggregate_group_order_by( 9746 self, query: exp.Select, group_by_exists: bool = True 9747 ) -> exp.Select: 9748 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9749 aggregates_or_groups, orders = [], [] 9750 for element in expr: 9751 if isinstance(element, exp.Ordered): 9752 this = element.this 9753 if isinstance(this, exp.Alias): 9754 element.set("this", this.args["alias"]) 9755 orders.append(element) 9756 else: 9757 this = element 9758 aggregates_or_groups.append(this) 9759 9760 if group_by_exists: 9761 query.select(*aggregates_or_groups, copy=False).group_by( 9762 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9763 copy=False, 9764 ) 9765 else: 9766 query.select(*aggregates_or_groups, append=False, copy=False) 9767 9768 if orders: 9769 return query.order_by(*orders, append=False, copy=False) 9770 9771 return query 9772 9773 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9774 self._match_text_seq("AGGREGATE") 9775 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9776 9777 if self._match(TokenType.GROUP_BY) or ( 9778 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9779 ): 9780 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9781 9782 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9783 9784 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None: 9785 first_setop = self.parse_set_operation(this=query) 9786 if not first_setop: 9787 return None 9788 9789 def _parse_and_unwrap_query() -> exp.Expr | None: 9790 expr = self._parse_paren() 9791 return expr.assert_is(exp.Subquery).unnest() if expr else None 9792 9793 first_setop.this.pop() 9794 9795 setops = [ 9796 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9797 *self._parse_csv(_parse_and_unwrap_query), 9798 ] 9799 9800 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9801 with_ = query.args.get("with_") 9802 ctes = with_.pop() if with_ else None 9803 9804 if isinstance(first_setop, exp.Union): 9805 query = query.union(*setops, copy=False, **first_setop.args) 9806 elif isinstance(first_setop, exp.Except): 9807 query = query.except_(*setops, copy=False, **first_setop.args) 9808 else: 9809 query = query.intersect(*setops, copy=False, **first_setop.args) 9810 9811 query.set("with_", ctes) 9812 9813 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9814 9815 def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None: 9816 join = self._parse_join() 9817 if not join: 9818 return None 9819 9820 if isinstance(query, exp.Select): 9821 return query.join(join, copy=False) 9822 9823 return query 9824 9825 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9826 pivots = self._parse_pivots() 9827 if not pivots: 9828 return query 9829 9830 from_ = query.args.get("from_") 9831 if from_: 9832 from_.this.set("pivots", pivots) 9833 9834 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9835 9836 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9837 self._match_text_seq("EXTEND") 9838 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9839 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9840 9841 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9842 sample = self._parse_table_sample() 9843 9844 with_ = query.args.get("with_") 9845 if with_: 9846 with_.expressions[-1].this.set("sample", sample) 9847 else: 9848 query.set("sample", sample) 9849 9850 return query 9851 9852 def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None: 9853 if isinstance(query, exp.Subquery): 9854 query = exp.select("*").from_(query, copy=False) 9855 9856 if not query.args.get("from_"): 9857 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9858 9859 while self._match(TokenType.PIPE_GT): 9860 start_index = self._index 9861 start_text = self._curr.text.upper() 9862 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text) 9863 if not parser: 9864 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9865 # keywords, making it tricky to disambiguate them without lookahead. The approach 9866 # here is to try and parse a set operation and if that fails, then try to parse a 9867 # join operator. If that fails as well, then the operator is not supported. 9868 parsed_query = self._parse_pipe_syntax_set_operator(query) 9869 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9870 if not parsed_query: 9871 self._retreat(start_index) 9872 self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.") 9873 break 9874 query = parsed_query 9875 else: 9876 query = parser(self, query) 9877 9878 return query 9879 9880 def _parse_declareitem(self) -> exp.DeclareItem | None: 9881 self._match_texts(("VAR", "VARIABLE")) 9882 9883 vars = self._parse_csv(self._parse_id_var) 9884 if not vars: 9885 return None 9886 9887 self._match(TokenType.ALIAS) 9888 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 9889 default = ( 9890 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 9891 ) and self._parse_bitwise() 9892 9893 return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default)) 9894 9895 def _parse_declare(self) -> exp.Declare | exp.Command: 9896 start = self._prev 9897 replace = self._match_text_seq("OR", "REPLACE") 9898 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9899 9900 if not expressions or self._curr: 9901 return self._parse_as_command(start) 9902 9903 return self.expression(exp.Declare(expressions=expressions, replace=replace)) 9904 9905 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9906 exp_class = exp.Cast if strict else exp.TryCast 9907 9908 if exp_class == exp.TryCast: 9909 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9910 9911 return self.expression(exp_class(**kwargs)) 9912 9913 def _parse_json_value(self) -> exp.JSONValue: 9914 this = self._parse_bitwise() 9915 self._match(TokenType.COMMA) 9916 path = self._parse_bitwise() 9917 9918 returning = self._match(TokenType.RETURNING) and self._parse_type() 9919 9920 return self.expression( 9921 exp.JSONValue( 9922 this=this, 9923 path=self.dialect.to_json_path(path), 9924 returning=returning, 9925 on_condition=self._parse_on_condition(), 9926 ) 9927 ) 9928 9929 def _parse_group_concat(self) -> exp.Expr | None: 9930 def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr: 9931 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9932 concat_exprs = [ 9933 self.expression( 9934 exp.Concat( 9935 expressions=node.expressions, 9936 safe=True, 9937 coalesce=self.dialect.CONCAT_COALESCE, 9938 ) 9939 ) 9940 ] 9941 node.set("expressions", concat_exprs) 9942 return node 9943 if len(exprs) == 1: 9944 return exprs[0] 9945 return self.expression( 9946 exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE) 9947 ) 9948 9949 args = self._parse_csv(self._parse_lambda) 9950 9951 if args: 9952 order = args[-1] if isinstance(args[-1], exp.Order) else None 9953 9954 if order: 9955 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9956 # remove 'expr' from exp.Order and add it back to args 9957 args[-1] = order.this 9958 order.set("this", concat_exprs(order.this, args)) 9959 9960 this = order or concat_exprs(args[0], args) 9961 else: 9962 this = None 9963 9964 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9965 9966 return self.expression(exp.GroupConcat(this=this, separator=separator)) 9967 9968 def _parse_initcap(self) -> exp.Initcap: 9969 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9970 9971 # attach dialect's default delimiters 9972 if expr.args.get("expression") is None: 9973 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9974 9975 return expr 9976 9977 def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None: 9978 while True: 9979 if not self._match(TokenType.L_PAREN): 9980 break 9981 9982 op = "" 9983 while self._curr and not self._match(TokenType.R_PAREN): 9984 op += self._curr.text 9985 self._advance() 9986 9987 comments = self._prev_comments 9988 this = self.expression( 9989 exp.Operator(this=this, operator=op, expression=self._parse_bitwise()), 9990 comments=comments, 9991 ) 9992 9993 if not self._match(TokenType.OPERATOR): 9994 break 9995 9996 return this
46def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap: 47 if len(args) == 1 and args[0].is_star: 48 return exp.StarMap(this=args[0]) 49 50 keys: list[ExpOrStr] = [] 51 values: list[ExpOrStr] = [] 52 for i in range(0, len(args), 2): 53 keys.append(args[i]) 54 values.append(args[i + 1]) 55 56 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
64def binary_range_parser( 65 expr_type: Type[exp.Expr], reverse_args: bool = False 66) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]: 67 def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None: 68 expression = self._parse_bitwise() 69 if reverse_args: 70 this, expression = expression, this 71 return self._parse_escape(self.expression(expr_type(this=this, expression=expression))) 72 73 return _parse_binary_range
76def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func: 77 # Default argument order is base, expression 78 this = seq_get(args, 0) 79 expression = seq_get(args, 1) 80 81 if expression: 82 if not dialect.LOG_BASE_FIRST: 83 this, expression = expression, this 84 return exp.Log(this=this, expression=expression) 85 86 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
106def build_extract_json_with_path( 107 expr_type: Type[E], 108) -> t.Callable[[BuilderArgs, Dialect], E]: 109 def _builder(args: BuilderArgs, dialect: Dialect) -> E: 110 expression = expr_type( 111 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 112 ) 113 if len(args) > 2 and expr_type is exp.JSONExtract: 114 expression.set("expressions", args[2:]) 115 if expr_type is exp.JSONExtractScalar: 116 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 117 118 return expression 119 120 return _builder
123def build_mod(args: BuilderArgs) -> exp.Mod: 124 this = seq_get(args, 0) 125 expression = seq_get(args, 1) 126 127 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 128 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 129 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 130 131 return exp.Mod(this=this, expression=expression)
143def build_array_constructor( 144 exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect 145) -> exp.Expr: 146 array_exp = exp_class(expressions=args) 147 148 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 149 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 150 151 return array_exp
154def build_convert_timezone( 155 args: BuilderArgs, default_source_tz: str | None = None 156) -> exp.ConvertTimezone | exp.Anonymous: 157 if len(args) == 2: 158 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 159 return exp.ConvertTimezone( 160 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 161 ) 162 163 return exp.ConvertTimezone.from_arg_list(args)
166def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim: 167 this, expression = seq_get(args, 0), seq_get(args, 1) 168 169 if expression and reverse_args: 170 this, expression = expression, this 171 172 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
189def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend: 190 """ 191 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 192 193 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 194 Others (DuckDB, PostgreSQL) create a new single-element array instead. 195 196 Args: 197 args: Function arguments [array, element] 198 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 199 200 Returns: 201 ArrayAppend expression with appropriate null_propagation flag 202 """ 203 return exp.ArrayAppend( 204 this=seq_get(args, 0), 205 expression=seq_get(args, 1), 206 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 207 )
Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayAppend expression with appropriate null_propagation flag
210def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend: 211 """ 212 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 213 214 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 215 Others (DuckDB, PostgreSQL) create a new single-element array instead. 216 217 Args: 218 args: Function arguments [array, element] 219 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 220 221 Returns: 222 ArrayPrepend expression with appropriate null_propagation flag 223 """ 224 return exp.ArrayPrepend( 225 this=seq_get(args, 0), 226 expression=seq_get(args, 1), 227 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 228 )
Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayPrepend expression with appropriate null_propagation flag
231def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat: 232 """ 233 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 234 235 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 236 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 237 238 Args: 239 args: Function arguments [array1, array2, ...] (variadic) 240 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 241 242 Returns: 243 ArrayConcat expression with appropriate null_propagation flag 244 """ 245 return exp.ArrayConcat( 246 this=seq_get(args, 0), 247 expressions=args[1:], 248 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 249 )
Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
Arguments:
- args: Function arguments [array1, array2, ...] (variadic)
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayConcat expression with appropriate null_propagation flag
252def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove: 253 """ 254 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 255 256 Some dialects (Snowflake) return NULL when the removal value is NULL. 257 Others (DuckDB) may return empty array due to NULL comparison semantics. 258 259 Args: 260 args: Function arguments [array, value_to_remove] 261 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 262 263 Returns: 264 ArrayRemove expression with appropriate null_propagation flag 265 """ 266 return exp.ArrayRemove( 267 this=seq_get(args, 0), 268 expression=seq_get(args, 1), 269 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 270 )
Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics.
Arguments:
- args: Function arguments [array, value_to_remove]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayRemove expression with appropriate null_propagation flag
291class Parser: 292 """ 293 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 294 295 Args: 296 error_level: The desired error level. 297 Default: ErrorLevel.IMMEDIATE 298 error_message_context: The amount of context to capture from a query string when displaying 299 the error message (in number of characters). 300 Default: 100 301 max_errors: Maximum number of error messages to include in a raised ParseError. 302 This is only relevant if error_level is ErrorLevel.RAISE. 303 Default: 3 304 max_nodes: Maximum number of AST nodes to prevent memory exhaustion. 305 Set to -1 (default) to disable the check. 306 """ 307 308 __slots__ = ( 309 "error_level", 310 "error_message_context", 311 "max_errors", 312 "max_nodes", 313 "dialect", 314 "sql", 315 "errors", 316 "_tokens", 317 "_index", 318 "_curr", 319 "_next", 320 "_prev", 321 "_prev_comments", 322 "_pipe_cte_counter", 323 "_chunks", 324 "_chunk_index", 325 "_tokens_size", 326 "_node_count", 327 ) 328 329 FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = { 330 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 331 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 332 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 333 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 334 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 335 ), 336 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 337 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 338 ), 339 "ARRAY_APPEND": build_array_append, 340 "ARRAY_CAT": build_array_concat, 341 "ARRAY_CONCAT": build_array_concat, 342 "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), 343 "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args), 344 "ARRAY_PREPEND": build_array_prepend, 345 "ARRAY_REMOVE": build_array_remove, 346 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 347 "CONCAT": lambda args, dialect: exp.Concat( 348 expressions=args, 349 safe=not dialect.STRICT_STRING_CONCAT, 350 coalesce=dialect.CONCAT_COALESCE, 351 ), 352 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 353 expressions=args, 354 safe=not dialect.STRICT_STRING_CONCAT, 355 coalesce=dialect.CONCAT_WS_COALESCE, 356 ), 357 "CONVERT_TIMEZONE": build_convert_timezone, 358 "DATE_TO_DATE_STR": lambda args: exp.Cast( 359 this=seq_get(args, 0), 360 to=exp.DataType(this=exp.DType.TEXT), 361 ), 362 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 363 start=seq_get(args, 0), 364 end=seq_get(args, 1), 365 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 366 ), 367 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 368 is_string=dialect.UUID_IS_STRING_TYPE or None 369 ), 370 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 371 "GREATEST": lambda args, dialect: exp.Greatest( 372 this=seq_get(args, 0), 373 expressions=args[1:], 374 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 375 ), 376 "LEAST": lambda args, dialect: exp.Least( 377 this=seq_get(args, 0), 378 expressions=args[1:], 379 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 380 ), 381 "HEX": build_hex, 382 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 383 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 384 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 385 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 386 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 387 ), 388 "LIKE": build_like, 389 "LOG": build_logarithm, 390 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 391 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 392 "LOWER": build_lower, 393 "LPAD": lambda args: build_pad(args), 394 "LEFTPAD": lambda args: build_pad(args), 395 "LTRIM": lambda args: build_trim(args), 396 "MOD": build_mod, 397 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 398 "RPAD": lambda args: build_pad(args, is_left=False), 399 "RTRIM": lambda args: build_trim(args, is_left=False), 400 "SCOPE_RESOLUTION": lambda args: ( 401 exp.ScopeResolution(expression=seq_get(args, 0)) 402 if len(args) != 2 403 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)) 404 ), 405 "STRPOS": exp.StrPosition.from_arg_list, 406 "CHARINDEX": lambda args: build_locate_strposition(args), 407 "INSTR": exp.StrPosition.from_arg_list, 408 "LOCATE": lambda args: build_locate_strposition(args), 409 "TIME_TO_TIME_STR": lambda args: exp.Cast( 410 this=seq_get(args, 0), 411 to=exp.DataType(this=exp.DType.TEXT), 412 ), 413 "TO_HEX": build_hex, 414 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 415 this=exp.Cast( 416 this=seq_get(args, 0), 417 to=exp.DataType(this=exp.DType.TEXT), 418 ), 419 start=exp.Literal.number(1), 420 length=exp.Literal.number(10), 421 ), 422 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 423 "UPPER": build_upper, 424 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 425 "UUID_STRING": lambda args, dialect: exp.Uuid( 426 this=seq_get(args, 0), 427 name=seq_get(args, 1), 428 is_string=dialect.UUID_IS_STRING_TYPE or None, 429 ), 430 "VAR_MAP": build_var_map, 431 } 432 433 NO_PAREN_FUNCTIONS: t.ClassVar[dict] = { 434 TokenType.CURRENT_DATE: exp.CurrentDate, 435 TokenType.CURRENT_DATETIME: exp.CurrentDate, 436 TokenType.CURRENT_TIME: exp.CurrentTime, 437 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 438 TokenType.CURRENT_USER: exp.CurrentUser, 439 TokenType.CURRENT_ROLE: exp.CurrentRole, 440 } 441 442 STRUCT_TYPE_TOKENS: t.ClassVar = { 443 TokenType.NESTED, 444 TokenType.OBJECT, 445 TokenType.STRUCT, 446 TokenType.UNION, 447 } 448 449 NESTED_TYPE_TOKENS: t.ClassVar = { 450 TokenType.ARRAY, 451 TokenType.LIST, 452 TokenType.LOWCARDINALITY, 453 TokenType.MAP, 454 TokenType.NULLABLE, 455 TokenType.RANGE, 456 *STRUCT_TYPE_TOKENS, 457 } 458 459 ENUM_TYPE_TOKENS: t.ClassVar = { 460 TokenType.DYNAMIC, 461 TokenType.ENUM, 462 TokenType.ENUM8, 463 TokenType.ENUM16, 464 } 465 466 AGGREGATE_TYPE_TOKENS: t.ClassVar = { 467 TokenType.AGGREGATEFUNCTION, 468 TokenType.SIMPLEAGGREGATEFUNCTION, 469 } 470 471 TYPE_TOKENS: t.ClassVar = { 472 TokenType.BIT, 473 TokenType.BOOLEAN, 474 TokenType.TINYINT, 475 TokenType.UTINYINT, 476 TokenType.SMALLINT, 477 TokenType.USMALLINT, 478 TokenType.INT, 479 TokenType.UINT, 480 TokenType.BIGINT, 481 TokenType.UBIGINT, 482 TokenType.BIGNUM, 483 TokenType.INT128, 484 TokenType.UINT128, 485 TokenType.INT256, 486 TokenType.UINT256, 487 TokenType.MEDIUMINT, 488 TokenType.UMEDIUMINT, 489 TokenType.FIXEDSTRING, 490 TokenType.FLOAT, 491 TokenType.DOUBLE, 492 TokenType.UDOUBLE, 493 TokenType.CHAR, 494 TokenType.NCHAR, 495 TokenType.VARCHAR, 496 TokenType.NVARCHAR, 497 TokenType.BPCHAR, 498 TokenType.TEXT, 499 TokenType.MEDIUMTEXT, 500 TokenType.LONGTEXT, 501 TokenType.BLOB, 502 TokenType.MEDIUMBLOB, 503 TokenType.LONGBLOB, 504 TokenType.BINARY, 505 TokenType.VARBINARY, 506 TokenType.JSON, 507 TokenType.JSONB, 508 TokenType.INTERVAL, 509 TokenType.TINYBLOB, 510 TokenType.TINYTEXT, 511 TokenType.TIME, 512 TokenType.TIMETZ, 513 TokenType.TIME_NS, 514 TokenType.TIMESTAMP, 515 TokenType.TIMESTAMP_S, 516 TokenType.TIMESTAMP_MS, 517 TokenType.TIMESTAMP_NS, 518 TokenType.TIMESTAMPTZ, 519 TokenType.TIMESTAMPLTZ, 520 TokenType.TIMESTAMPNTZ, 521 TokenType.DATETIME, 522 TokenType.DATETIME2, 523 TokenType.DATETIME64, 524 TokenType.SMALLDATETIME, 525 TokenType.DATE, 526 TokenType.DATE32, 527 TokenType.INT4RANGE, 528 TokenType.INT4MULTIRANGE, 529 TokenType.INT8RANGE, 530 TokenType.INT8MULTIRANGE, 531 TokenType.NUMRANGE, 532 TokenType.NUMMULTIRANGE, 533 TokenType.TSRANGE, 534 TokenType.TSMULTIRANGE, 535 TokenType.TSTZRANGE, 536 TokenType.TSTZMULTIRANGE, 537 TokenType.DATERANGE, 538 TokenType.DATEMULTIRANGE, 539 TokenType.DECIMAL, 540 TokenType.DECIMAL32, 541 TokenType.DECIMAL64, 542 TokenType.DECIMAL128, 543 TokenType.DECIMAL256, 544 TokenType.DECFLOAT, 545 TokenType.UDECIMAL, 546 TokenType.BIGDECIMAL, 547 TokenType.UUID, 548 TokenType.GEOGRAPHY, 549 TokenType.GEOGRAPHYPOINT, 550 TokenType.GEOMETRY, 551 TokenType.POINT, 552 TokenType.RING, 553 TokenType.LINESTRING, 554 TokenType.MULTILINESTRING, 555 TokenType.POLYGON, 556 TokenType.MULTIPOLYGON, 557 TokenType.HLLSKETCH, 558 TokenType.HSTORE, 559 TokenType.PSEUDO_TYPE, 560 TokenType.SUPER, 561 TokenType.SERIAL, 562 TokenType.SMALLSERIAL, 563 TokenType.BIGSERIAL, 564 TokenType.XML, 565 TokenType.YEAR, 566 TokenType.USERDEFINED, 567 TokenType.MONEY, 568 TokenType.SMALLMONEY, 569 TokenType.ROWVERSION, 570 TokenType.IMAGE, 571 TokenType.VARIANT, 572 TokenType.VECTOR, 573 TokenType.VOID, 574 TokenType.OBJECT, 575 TokenType.OBJECT_IDENTIFIER, 576 TokenType.INET, 577 TokenType.IPADDRESS, 578 TokenType.IPPREFIX, 579 TokenType.IPV4, 580 TokenType.IPV6, 581 TokenType.UNKNOWN, 582 TokenType.NOTHING, 583 TokenType.NULL, 584 TokenType.NAME, 585 TokenType.TDIGEST, 586 TokenType.DYNAMIC, 587 *ENUM_TYPE_TOKENS, 588 *NESTED_TYPE_TOKENS, 589 *AGGREGATE_TYPE_TOKENS, 590 } 591 592 SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = { 593 TokenType.BIGINT: TokenType.UBIGINT, 594 TokenType.INT: TokenType.UINT, 595 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 596 TokenType.SMALLINT: TokenType.USMALLINT, 597 TokenType.TINYINT: TokenType.UTINYINT, 598 TokenType.DECIMAL: TokenType.UDECIMAL, 599 TokenType.DOUBLE: TokenType.UDOUBLE, 600 } 601 602 SUBQUERY_PREDICATES: t.ClassVar = { 603 TokenType.ANY: exp.Any, 604 TokenType.ALL: exp.All, 605 TokenType.EXISTS: exp.Exists, 606 TokenType.SOME: exp.Any, 607 } 608 609 SUBQUERY_TOKENS: t.ClassVar = { 610 TokenType.SELECT, 611 TokenType.WITH, 612 TokenType.FROM, 613 } 614 615 RESERVED_TOKENS: t.ClassVar = { 616 *Tokenizer.SINGLE_TOKENS.values(), 617 TokenType.SELECT, 618 } - {TokenType.IDENTIFIER} 619 620 DB_CREATABLES: t.ClassVar = { 621 TokenType.DATABASE, 622 TokenType.DICTIONARY, 623 TokenType.FILE_FORMAT, 624 TokenType.MODEL, 625 TokenType.NAMESPACE, 626 TokenType.SCHEMA, 627 TokenType.SEMANTIC_VIEW, 628 TokenType.SEQUENCE, 629 TokenType.SINK, 630 TokenType.SOURCE, 631 TokenType.STAGE, 632 TokenType.STORAGE_INTEGRATION, 633 TokenType.STREAMLIT, 634 TokenType.TABLE, 635 TokenType.TAG, 636 TokenType.VIEW, 637 TokenType.WAREHOUSE, 638 } 639 640 CREATABLES: t.ClassVar = { 641 TokenType.COLUMN, 642 TokenType.CONSTRAINT, 643 TokenType.FOREIGN_KEY, 644 TokenType.FUNCTION, 645 TokenType.INDEX, 646 TokenType.PROCEDURE, 647 TokenType.TRIGGER, 648 TokenType.TYPE, 649 *DB_CREATABLES, 650 } 651 652 TRIGGER_EVENTS: t.ClassVar = { 653 TokenType.INSERT, 654 TokenType.UPDATE, 655 TokenType.DELETE, 656 TokenType.TRUNCATE, 657 } 658 659 ALTERABLES: t.ClassVar = { 660 TokenType.INDEX, 661 TokenType.TABLE, 662 TokenType.VIEW, 663 TokenType.SESSION, 664 } 665 666 # Tokens that can represent identifiers 667 ID_VAR_TOKENS: t.ClassVar[set] = { 668 TokenType.ALL, 669 TokenType.ANALYZE, 670 TokenType.ATTACH, 671 TokenType.VAR, 672 TokenType.ANTI, 673 TokenType.APPLY, 674 TokenType.ASC, 675 TokenType.ASOF, 676 TokenType.AUTO_INCREMENT, 677 TokenType.BEGIN, 678 TokenType.BPCHAR, 679 TokenType.CACHE, 680 TokenType.CASE, 681 TokenType.COLLATE, 682 TokenType.COMMAND, 683 TokenType.COMMENT, 684 TokenType.COMMIT, 685 TokenType.CONSTRAINT, 686 TokenType.COPY, 687 TokenType.CUBE, 688 TokenType.CURRENT_SCHEMA, 689 TokenType.DEFAULT, 690 TokenType.DELETE, 691 TokenType.DESC, 692 TokenType.DESCRIBE, 693 TokenType.DETACH, 694 TokenType.DICTIONARY, 695 TokenType.DIV, 696 TokenType.END, 697 TokenType.EXECUTE, 698 TokenType.EXPORT, 699 TokenType.ESCAPE, 700 TokenType.FALSE, 701 TokenType.FIRST, 702 TokenType.FILE, 703 TokenType.FILTER, 704 TokenType.FINAL, 705 TokenType.FORMAT, 706 TokenType.FULL, 707 TokenType.GET, 708 TokenType.IDENTIFIER, 709 TokenType.INOUT, 710 TokenType.IS, 711 TokenType.ISNULL, 712 TokenType.INTERVAL, 713 TokenType.KEEP, 714 TokenType.KILL, 715 TokenType.LEFT, 716 TokenType.LIMIT, 717 TokenType.LOAD, 718 TokenType.LOCK, 719 TokenType.MATCH, 720 TokenType.MERGE, 721 TokenType.NATURAL, 722 TokenType.NEXT, 723 TokenType.OFFSET, 724 TokenType.OPERATOR, 725 TokenType.ORDINALITY, 726 TokenType.OVER, 727 TokenType.OVERLAPS, 728 TokenType.OVERWRITE, 729 TokenType.PARTITION, 730 TokenType.PERCENT, 731 TokenType.PIVOT, 732 TokenType.PRAGMA, 733 TokenType.PUT, 734 TokenType.RANGE, 735 TokenType.RECURSIVE, 736 TokenType.REFERENCES, 737 TokenType.REFRESH, 738 TokenType.RENAME, 739 TokenType.REPLACE, 740 TokenType.RIGHT, 741 TokenType.ROLLUP, 742 TokenType.ROW, 743 TokenType.ROWS, 744 TokenType.SEMI, 745 TokenType.SET, 746 TokenType.SETTINGS, 747 TokenType.SHOW, 748 TokenType.STREAM, 749 TokenType.STREAMLIT, 750 TokenType.TEMPORARY, 751 TokenType.TOP, 752 TokenType.TRUE, 753 TokenType.TRUNCATE, 754 TokenType.UNIQUE, 755 TokenType.UNNEST, 756 TokenType.UNPIVOT, 757 TokenType.UPDATE, 758 TokenType.USE, 759 TokenType.VOLATILE, 760 TokenType.WINDOW, 761 TokenType.CURRENT_CATALOG, 762 TokenType.LOCALTIME, 763 TokenType.LOCALTIMESTAMP, 764 TokenType.SESSION_USER, 765 TokenType.STRAIGHT_JOIN, 766 *ALTERABLES, 767 *CREATABLES, 768 *SUBQUERY_PREDICATES, 769 *TYPE_TOKENS, 770 *NO_PAREN_FUNCTIONS, 771 } - {TokenType.UNION} 772 773 TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - { 774 TokenType.ANTI, 775 TokenType.ASOF, 776 TokenType.FULL, 777 TokenType.LEFT, 778 TokenType.LOCK, 779 TokenType.NATURAL, 780 TokenType.RIGHT, 781 TokenType.SEMI, 782 TokenType.WINDOW, 783 } 784 785 ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS 786 787 COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS 788 789 ARRAY_CONSTRUCTORS: t.ClassVar = { 790 "ARRAY": exp.Array, 791 "LIST": exp.List, 792 } 793 794 COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS} 795 796 UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET} 797 798 TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"} 799 800 # Tokens that indicate a simple column reference 801 IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER}) 802 803 BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE}) 804 805 # Postfix tokens that prevent the bare column fast path 806 COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 807 { 808 TokenType.L_PAREN, 809 TokenType.L_BRACKET, 810 TokenType.L_BRACE, 811 TokenType.COLON, 812 TokenType.JOIN_MARKER, 813 } 814 ) 815 816 TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 817 { 818 TokenType.L_PAREN, 819 TokenType.L_BRACKET, 820 TokenType.L_BRACE, 821 TokenType.PIVOT, 822 TokenType.UNPIVOT, 823 TokenType.TABLE_SAMPLE, 824 } 825 ) 826 827 FUNC_TOKENS: t.ClassVar = { 828 TokenType.COLLATE, 829 TokenType.COMMAND, 830 TokenType.CURRENT_DATE, 831 TokenType.CURRENT_DATETIME, 832 TokenType.CURRENT_SCHEMA, 833 TokenType.CURRENT_TIMESTAMP, 834 TokenType.CURRENT_TIME, 835 TokenType.CURRENT_USER, 836 TokenType.CURRENT_CATALOG, 837 TokenType.FILTER, 838 TokenType.FIRST, 839 TokenType.FORMAT, 840 TokenType.GET, 841 TokenType.GLOB, 842 TokenType.IDENTIFIER, 843 TokenType.INDEX, 844 TokenType.ISNULL, 845 TokenType.ILIKE, 846 TokenType.INSERT, 847 TokenType.LIKE, 848 TokenType.LOCALTIME, 849 TokenType.LOCALTIMESTAMP, 850 TokenType.MERGE, 851 TokenType.NEXT, 852 TokenType.OFFSET, 853 TokenType.PRIMARY_KEY, 854 TokenType.RANGE, 855 TokenType.REPLACE, 856 TokenType.RLIKE, 857 TokenType.ROW, 858 TokenType.SESSION_USER, 859 TokenType.UNNEST, 860 TokenType.VAR, 861 TokenType.LEFT, 862 TokenType.RIGHT, 863 TokenType.SEQUENCE, 864 TokenType.DATE, 865 TokenType.DATETIME, 866 TokenType.TABLE, 867 TokenType.TIMESTAMP, 868 TokenType.TIMESTAMPTZ, 869 TokenType.TRUNCATE, 870 TokenType.UTC_DATE, 871 TokenType.UTC_TIME, 872 TokenType.UTC_TIMESTAMP, 873 TokenType.WINDOW, 874 TokenType.XOR, 875 *TYPE_TOKENS, 876 *SUBQUERY_PREDICATES, 877 } 878 879 CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 880 TokenType.AND: exp.And, 881 } 882 883 ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 884 TokenType.COLON_EQ: exp.PropertyEQ, 885 } 886 887 DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 888 TokenType.OR: exp.Or, 889 } 890 891 EQUALITY: t.ClassVar = { 892 TokenType.EQ: exp.EQ, 893 TokenType.NEQ: exp.NEQ, 894 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 895 } 896 897 COMPARISON: t.ClassVar = { 898 TokenType.GT: exp.GT, 899 TokenType.GTE: exp.GTE, 900 TokenType.LT: exp.LT, 901 TokenType.LTE: exp.LTE, 902 } 903 904 BITWISE: t.ClassVar = { 905 TokenType.AMP: exp.BitwiseAnd, 906 TokenType.CARET: exp.BitwiseXor, 907 TokenType.PIPE: exp.BitwiseOr, 908 } 909 910 TERM: t.ClassVar = { 911 TokenType.DASH: exp.Sub, 912 TokenType.PLUS: exp.Add, 913 TokenType.MOD: exp.Mod, 914 TokenType.COLLATE: exp.Collate, 915 } 916 917 FACTOR: t.ClassVar = { 918 TokenType.DIV: exp.IntDiv, 919 TokenType.LR_ARROW: exp.Distance, 920 TokenType.SLASH: exp.Div, 921 TokenType.STAR: exp.Mul, 922 } 923 924 EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {} 925 926 TIMES: t.ClassVar = { 927 TokenType.TIME, 928 TokenType.TIMETZ, 929 } 930 931 TIMESTAMPS: t.ClassVar = { 932 TokenType.TIMESTAMP, 933 TokenType.TIMESTAMPNTZ, 934 TokenType.TIMESTAMPTZ, 935 TokenType.TIMESTAMPLTZ, 936 *TIMES, 937 } 938 939 SET_OPERATIONS: t.ClassVar = { 940 TokenType.UNION, 941 TokenType.INTERSECT, 942 TokenType.EXCEPT, 943 } 944 945 JOIN_METHODS: t.ClassVar = { 946 TokenType.ASOF, 947 TokenType.NATURAL, 948 TokenType.POSITIONAL, 949 } 950 951 JOIN_SIDES: t.ClassVar = { 952 TokenType.LEFT, 953 TokenType.RIGHT, 954 TokenType.FULL, 955 } 956 957 JOIN_KINDS: t.ClassVar = { 958 TokenType.ANTI, 959 TokenType.CROSS, 960 TokenType.INNER, 961 TokenType.OUTER, 962 TokenType.SEMI, 963 TokenType.STRAIGHT_JOIN, 964 } 965 966 JOIN_HINTS: t.ClassVar[set[str]] = set() 967 968 # Tokens that unambiguously end a table reference on the fast path 969 TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset( 970 { 971 TokenType.COMMA, 972 TokenType.GROUP_BY, 973 TokenType.HAVING, 974 TokenType.JOIN, 975 TokenType.LIMIT, 976 TokenType.ON, 977 TokenType.ORDER_BY, 978 TokenType.R_PAREN, 979 TokenType.SEMICOLON, 980 TokenType.SENTINEL, 981 TokenType.WHERE, 982 *SET_OPERATIONS, 983 *JOIN_KINDS, 984 *JOIN_METHODS, 985 *JOIN_SIDES, 986 } 987 ) 988 989 LAMBDAS: t.ClassVar = { 990 TokenType.ARROW: lambda self, expressions: self.expression( 991 exp.Lambda( 992 this=self._replace_lambda( 993 self._parse_disjunction(), 994 expressions, 995 ), 996 expressions=expressions, 997 ) 998 ), 999 TokenType.FARROW: lambda self, expressions: self.expression( 1000 exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction()) 1001 ), 1002 } 1003 1004 # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake 1005 TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False 1006 1007 LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN}) 1008 1009 COLUMN_OPERATORS: t.ClassVar = { 1010 TokenType.DOT: None, 1011 TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)), 1012 TokenType.DCOLON: lambda self, this, to: self.build_cast( 1013 strict=self.STRICT_CAST, this=this, to=to 1014 ), 1015 TokenType.ARROW: lambda self, this, path: self.expression( 1016 exp.JSONExtract( 1017 this=this, 1018 expression=self.dialect.to_json_path(path), 1019 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1020 ) 1021 ), 1022 TokenType.DARROW: lambda self, this, path: self.expression( 1023 exp.JSONExtractScalar( 1024 this=this, 1025 expression=self.dialect.to_json_path(path), 1026 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1027 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 1028 ) 1029 ), 1030 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 1031 exp.JSONBExtract(this=this, expression=path) 1032 ), 1033 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 1034 exp.JSONBExtractScalar(this=this, expression=path) 1035 ), 1036 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 1037 exp.JSONBContains(this=this, expression=key) 1038 ), 1039 } 1040 1041 CAST_COLUMN_OPERATORS: t.ClassVar = { 1042 TokenType.DOTCOLON, 1043 TokenType.DCOLON, 1044 } 1045 1046 EXPRESSION_PARSERS: t.ClassVar = { 1047 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1048 exp.Column: lambda self: self._parse_column(), 1049 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 1050 exp.Condition: lambda self: self._parse_disjunction(), 1051 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 1052 exp.Expr: lambda self: self._parse_expression(), 1053 exp.From: lambda self: self._parse_from(joins=True), 1054 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 1055 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 1056 exp.Group: lambda self: self._parse_group(), 1057 exp.Having: lambda self: self._parse_having(), 1058 exp.Hint: lambda self: self._parse_hint_body(), 1059 exp.Identifier: lambda self: self._parse_id_var(), 1060 exp.Join: lambda self: self._parse_join(), 1061 exp.Lambda: lambda self: self._parse_lambda(), 1062 exp.Lateral: lambda self: self._parse_lateral(), 1063 exp.Limit: lambda self: self._parse_limit(), 1064 exp.Offset: lambda self: self._parse_offset(), 1065 exp.Order: lambda self: self._parse_order(), 1066 exp.Ordered: lambda self: self._parse_ordered(), 1067 exp.Properties: lambda self: self._parse_properties(), 1068 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 1069 exp.Qualify: lambda self: self._parse_qualify(), 1070 exp.Returning: lambda self: self._parse_returning(), 1071 exp.Select: lambda self: self._parse_select(), 1072 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 1073 exp.Table: lambda self: self._parse_table_parts(), 1074 exp.TableAlias: lambda self: self._parse_table_alias(), 1075 exp.Tuple: lambda self: self._parse_value(values=False), 1076 exp.Whens: lambda self: self._parse_when_matched(), 1077 exp.Where: lambda self: self._parse_where(), 1078 exp.Window: lambda self: self._parse_named_window(), 1079 exp.With: lambda self: self._parse_with(), 1080 } 1081 1082 STATEMENT_PARSERS: t.ClassVar = { 1083 TokenType.ALTER: lambda self: self._parse_alter(), 1084 TokenType.ANALYZE: lambda self: self._parse_analyze(), 1085 TokenType.BEGIN: lambda self: self._parse_transaction(), 1086 TokenType.CACHE: lambda self: self._parse_cache(), 1087 TokenType.COMMENT: lambda self: self._parse_comment(), 1088 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 1089 TokenType.COPY: lambda self: self._parse_copy(), 1090 TokenType.CREATE: lambda self: self._parse_create(), 1091 TokenType.DELETE: lambda self: self._parse_delete(), 1092 TokenType.DESC: lambda self: self._parse_describe(), 1093 TokenType.DESCRIBE: lambda self: self._parse_describe(), 1094 TokenType.DROP: lambda self: self._parse_drop(), 1095 TokenType.GRANT: lambda self: self._parse_grant(), 1096 TokenType.REVOKE: lambda self: self._parse_revoke(), 1097 TokenType.INSERT: lambda self: self._parse_insert(), 1098 TokenType.KILL: lambda self: self._parse_kill(), 1099 TokenType.LOAD: lambda self: self._parse_load(), 1100 TokenType.MERGE: lambda self: self._parse_merge(), 1101 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 1102 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())), 1103 TokenType.REFRESH: lambda self: self._parse_refresh(), 1104 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 1105 TokenType.SET: lambda self: self._parse_set(), 1106 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 1107 TokenType.UNCACHE: lambda self: self._parse_uncache(), 1108 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 1109 TokenType.UPDATE: lambda self: self._parse_update(), 1110 TokenType.USE: lambda self: self._parse_use(), 1111 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1112 } 1113 1114 UNARY_PARSERS: t.ClassVar = { 1115 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1116 TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())), 1117 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), 1118 TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())), 1119 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())), 1120 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())), 1121 } 1122 1123 STRING_PARSERS: t.ClassVar = { 1124 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 1125 exp.RawString(this=token.text), token 1126 ), 1127 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 1128 exp.National(this=token.text), token 1129 ), 1130 TokenType.RAW_STRING: lambda self, token: self.expression( 1131 exp.RawString(this=token.text), token 1132 ), 1133 TokenType.STRING: lambda self, token: self.expression( 1134 exp.Literal(this=token.text, is_string=True), token 1135 ), 1136 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1137 exp.UnicodeString( 1138 this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string() 1139 ), 1140 token, 1141 ), 1142 } 1143 1144 NUMERIC_PARSERS: t.ClassVar = { 1145 TokenType.BIT_STRING: lambda self, token: self.expression( 1146 exp.BitString(this=token.text), token 1147 ), 1148 TokenType.BYTE_STRING: lambda self, token: self.expression( 1149 exp.ByteString( 1150 this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None 1151 ), 1152 token, 1153 ), 1154 TokenType.HEX_STRING: lambda self, token: self.expression( 1155 exp.HexString( 1156 this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None 1157 ), 1158 token, 1159 ), 1160 TokenType.NUMBER: lambda self, token: self.expression( 1161 exp.Literal(this=token.text, is_string=False), token 1162 ), 1163 } 1164 1165 PRIMARY_PARSERS: t.ClassVar = { 1166 **STRING_PARSERS, 1167 **NUMERIC_PARSERS, 1168 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1169 TokenType.NULL: lambda self, _: self.expression(exp.Null()), 1170 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)), 1171 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)), 1172 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1173 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1174 } 1175 1176 PLACEHOLDER_PARSERS: t.ClassVar = { 1177 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()), 1178 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1179 TokenType.COLON: lambda self: ( 1180 self.expression(exp.Placeholder(this=self._prev.text)) 1181 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1182 else None 1183 ), 1184 } 1185 1186 RANGE_PARSERS: t.ClassVar = { 1187 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1188 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1189 TokenType.GLOB: binary_range_parser(exp.Glob), 1190 TokenType.ILIKE: binary_range_parser(exp.ILike), 1191 TokenType.IN: lambda self, this: self._parse_in(this), 1192 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1193 TokenType.IS: lambda self, this: self._parse_is(this), 1194 TokenType.LIKE: binary_range_parser(exp.Like), 1195 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1196 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1197 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1198 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1199 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1200 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1201 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1202 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1203 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1204 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1205 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1206 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1207 } 1208 1209 PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = { 1210 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1211 "AS": lambda self, query: self._build_pipe_cte( 1212 query, [exp.Star()], self._parse_table_alias() 1213 ), 1214 "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False), 1215 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1216 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1217 "ORDER BY": lambda self, query: query.order_by( 1218 self._parse_order(), append=False, copy=False 1219 ), 1220 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1221 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1222 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1223 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1224 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1225 } 1226 1227 PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1228 "ALLOWED_VALUES": lambda self: self.expression( 1229 exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary)) 1230 ), 1231 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1232 "AUTO": lambda self: self._parse_auto_property(), 1233 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1234 "BACKUP": lambda self: self.expression( 1235 exp.BackupProperty(this=self._parse_var(any_token=True)) 1236 ), 1237 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1238 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1239 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1240 "CHECKSUM": lambda self: self._parse_checksum(), 1241 "CLUSTER BY": lambda self: self._parse_cluster(), 1242 "CLUSTERED": lambda self: self._parse_clustered_by(), 1243 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1244 exp.CollateProperty, **kwargs 1245 ), 1246 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1247 "CONTAINS": lambda self: self._parse_contains_property(), 1248 "COPY": lambda self: self._parse_copy_property(), 1249 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1250 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1251 "DEFINER": lambda self: self._parse_definer(), 1252 "DETERMINISTIC": lambda self: self.expression( 1253 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1254 ), 1255 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1256 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1257 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), 1258 "DISTKEY": lambda self: self._parse_distkey(), 1259 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1260 "EMPTY": lambda self: self.expression(exp.EmptyProperty()), 1261 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1262 "ENVIRONMENT": lambda self: self.expression( 1263 exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment)) 1264 ), 1265 "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty), 1266 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1267 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), 1268 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1269 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1270 "FREESPACE": lambda self: self._parse_freespace(), 1271 "GLOBAL": lambda self: self.expression(exp.GlobalProperty()), 1272 "HEAP": lambda self: self.expression(exp.HeapProperty()), 1273 "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), 1274 "IMMUTABLE": lambda self: self.expression( 1275 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1276 ), 1277 "INHERITS": lambda self: self.expression( 1278 exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table)) 1279 ), 1280 "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())), 1281 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1282 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1283 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1284 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1285 "LIKE": lambda self: self._parse_create_like(), 1286 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1287 "LOCK": lambda self: self._parse_locking(), 1288 "LOCKING": lambda self: self._parse_locking(), 1289 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1290 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), 1291 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1292 "MODIFIES": lambda self: self._parse_modifies_property(), 1293 "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)), 1294 "NO": lambda self: self._parse_no_property(), 1295 "ON": lambda self: self._parse_on_property(), 1296 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1297 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())), 1298 "PARTITION": lambda self: self._parse_partitioned_of(), 1299 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1300 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1301 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1302 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1303 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1304 "READS": lambda self: self._parse_reads_property(), 1305 "REMOTE": lambda self: self._parse_remote_with_connection(), 1306 "RETURNS": lambda self: self._parse_returns(), 1307 "STRICT": lambda self: self.expression(exp.StrictProperty()), 1308 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()), 1309 "ROW": lambda self: self._parse_row(), 1310 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1311 "SAMPLE": lambda self: self.expression( 1312 exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise()) 1313 ), 1314 "SECURE": lambda self: self.expression(exp.SecureProperty()), 1315 "SECURITY": lambda self: self._parse_sql_security(), 1316 "SQL SECURITY": lambda self: self._parse_sql_security(), 1317 "SET": lambda self: self.expression(exp.SetProperty(multi=False)), 1318 "SETTINGS": lambda self: self._parse_settings_property(), 1319 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1320 "SORTKEY": lambda self: self._parse_sortkey(), 1321 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1322 "STABLE": lambda self: self.expression( 1323 exp.StabilityProperty(this=exp.Literal.string("STABLE")) 1324 ), 1325 "STORED": lambda self: self._parse_stored(), 1326 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1327 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1328 "TEMP": lambda self: self.expression(exp.TemporaryProperty()), 1329 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()), 1330 "TO": lambda self: self._parse_to_table(), 1331 "TRANSIENT": lambda self: self.expression(exp.TransientProperty()), 1332 "TRANSFORM": lambda self: self.expression( 1333 exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression)) 1334 ), 1335 "TTL": lambda self: self._parse_ttl(), 1336 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1337 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()), 1338 "VOLATILE": lambda self: self._parse_volatile_property(), 1339 "WITH": lambda self: self._parse_with_property(), 1340 } 1341 1342 CONSTRAINT_PARSERS: t.ClassVar = { 1343 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1344 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1345 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)), 1346 "CHARACTER SET": lambda self: self.expression( 1347 exp.CharacterSetColumnConstraint(this=self._parse_var_or_string()) 1348 ), 1349 "CHECK": lambda self: self._parse_check_constraint(), 1350 "COLLATE": lambda self: self.expression( 1351 exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column()) 1352 ), 1353 "COMMENT": lambda self: self.expression( 1354 exp.CommentColumnConstraint(this=self._parse_string()) 1355 ), 1356 "COMPRESS": lambda self: self._parse_compress(), 1357 "CLUSTERED": lambda self: self.expression( 1358 exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1359 ), 1360 "NONCLUSTERED": lambda self: self.expression( 1361 exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1362 ), 1363 "DEFAULT": lambda self: self.expression( 1364 exp.DefaultColumnConstraint(this=self._parse_bitwise()) 1365 ), 1366 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())), 1367 "EPHEMERAL": lambda self: self.expression( 1368 exp.EphemeralColumnConstraint(this=self._parse_bitwise()) 1369 ), 1370 "EXCLUDE": lambda self: self.expression( 1371 exp.ExcludeColumnConstraint(this=self._parse_index_params()) 1372 ), 1373 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1374 "FORMAT": lambda self: self.expression( 1375 exp.DateFormatColumnConstraint(this=self._parse_var_or_string()) 1376 ), 1377 "GENERATED": lambda self: self._parse_generated_as_identity(), 1378 "IDENTITY": lambda self: self._parse_auto_increment(), 1379 "INLINE": lambda self: self._parse_inline(), 1380 "LIKE": lambda self: self._parse_create_like(), 1381 "NOT": lambda self: self._parse_not_constraint(), 1382 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)), 1383 "ON": lambda self: ( 1384 ( 1385 self._match(TokenType.UPDATE) 1386 and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function())) 1387 ) 1388 or self.expression(exp.OnProperty(this=self._parse_id_var())) 1389 ), 1390 "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())), 1391 "PERIOD": lambda self: self._parse_period_for_system_time(), 1392 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1393 "REFERENCES": lambda self: self._parse_references(match=False), 1394 "TITLE": lambda self: self.expression( 1395 exp.TitleColumnConstraint(this=self._parse_var_or_string()) 1396 ), 1397 "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])), 1398 "UNIQUE": lambda self: self._parse_unique(), 1399 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()), 1400 "WITH": lambda self: self.expression( 1401 exp.Properties(expressions=self._parse_wrapped_properties()) 1402 ), 1403 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1404 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1405 } 1406 1407 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None: 1408 if not self._match(TokenType.L_PAREN, advance=False): 1409 # Partitioning by bucket or truncate follows the syntax: 1410 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1411 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1412 self._retreat(self._index - 1) 1413 return None 1414 1415 klass = ( 1416 exp.PartitionedByBucket 1417 if self._prev.text.upper() == "BUCKET" 1418 else exp.PartitionByTruncate 1419 ) 1420 1421 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1422 this, expression = seq_get(args, 0), seq_get(args, 1) 1423 1424 if isinstance(this, exp.Literal): 1425 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1426 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1427 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1428 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1429 # 1430 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1431 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1432 this, expression = expression, this 1433 1434 return self.expression(klass(this=this, expression=expression)) 1435 1436 ALTER_PARSERS: t.ClassVar = { 1437 "ADD": lambda self: self._parse_alter_table_add(), 1438 "AS": lambda self: self._parse_select(), 1439 "ALTER": lambda self: self._parse_alter_table_alter(), 1440 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1441 "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())), 1442 "DROP": lambda self: self._parse_alter_table_drop(), 1443 "RENAME": lambda self: self._parse_alter_table_rename(), 1444 "SET": lambda self: self._parse_alter_table_set(), 1445 "SWAP": lambda self: self.expression( 1446 exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True)) 1447 ), 1448 } 1449 1450 ALTER_ALTER_PARSERS: t.ClassVar = { 1451 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1452 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1453 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1454 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1455 } 1456 1457 SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = { 1458 "CHECK", 1459 "EXCLUDE", 1460 "FOREIGN KEY", 1461 "LIKE", 1462 "PERIOD", 1463 "PRIMARY KEY", 1464 "UNIQUE", 1465 "BUCKET", 1466 "TRUNCATE", 1467 } 1468 1469 NO_PAREN_FUNCTION_PARSERS: t.ClassVar = { 1470 "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())), 1471 "CASE": lambda self: self._parse_case(), 1472 "CONNECT_BY_ROOT": lambda self: self.expression( 1473 exp.ConnectByRoot(this=self._parse_column()) 1474 ), 1475 "IF": lambda self: self._parse_if(), 1476 } 1477 1478 INVALID_FUNC_NAME_TOKENS: t.ClassVar = { 1479 TokenType.IDENTIFIER, 1480 TokenType.STRING, 1481 } 1482 1483 FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"} 1484 1485 KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1486 1487 FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1488 **{ 1489 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1490 }, 1491 **{ 1492 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1493 }, 1494 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1495 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1496 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1497 "CHAR": lambda self: self._parse_char(), 1498 "CHR": lambda self: self._parse_char(), 1499 "DECODE": lambda self: self._parse_decode(), 1500 "EXTRACT": lambda self: self._parse_extract(), 1501 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1502 "GAP_FILL": lambda self: self._parse_gap_fill(), 1503 "INITCAP": lambda self: self._parse_initcap(), 1504 "JSON_OBJECT": lambda self: self._parse_json_object(), 1505 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1506 "JSON_TABLE": lambda self: self._parse_json_table(), 1507 "MATCH": lambda self: self._parse_match_against(), 1508 "NORMALIZE": lambda self: self._parse_normalize(), 1509 "OPENJSON": lambda self: self._parse_open_json(), 1510 "OVERLAY": lambda self: self._parse_overlay(), 1511 "POSITION": lambda self: self._parse_position(), 1512 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1513 "STRING_AGG": lambda self: self._parse_string_agg(), 1514 "SUBSTRING": lambda self: self._parse_substring(), 1515 "TRIM": lambda self: self._parse_trim(), 1516 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1517 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1518 "XMLELEMENT": lambda self: self._parse_xml_element(), 1519 "XMLTABLE": lambda self: self._parse_xml_table(), 1520 } 1521 1522 QUERY_MODIFIER_PARSERS: t.ClassVar = { 1523 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1524 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1525 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1526 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1527 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1528 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1529 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1530 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1531 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1532 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1533 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1534 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1535 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1536 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1537 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1538 TokenType.CLUSTER_BY: lambda self: ( 1539 "cluster", 1540 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1541 ), 1542 TokenType.DISTRIBUTE_BY: lambda self: ( 1543 "distribute", 1544 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1545 ), 1546 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1547 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1548 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1549 } 1550 QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS) 1551 1552 SET_PARSERS: t.ClassVar = { 1553 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1554 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1555 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1556 "TRANSACTION": lambda self: self._parse_set_transaction(), 1557 } 1558 1559 SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {} 1560 1561 TYPE_LITERAL_PARSERS: t.ClassVar = { 1562 exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)), 1563 } 1564 1565 TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {} 1566 1567 DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1568 1569 PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1570 1571 TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1572 TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = { 1573 "ISOLATION": ( 1574 ("LEVEL", "REPEATABLE", "READ"), 1575 ("LEVEL", "READ", "COMMITTED"), 1576 ("LEVEL", "READ", "UNCOMITTED"), 1577 ("LEVEL", "SERIALIZABLE"), 1578 ), 1579 "READ": ("WRITE", "ONLY"), 1580 } 1581 1582 CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = { 1583 **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()), 1584 "DO": ("NOTHING", "UPDATE"), 1585 } 1586 1587 TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = { 1588 "INSTEAD": (("OF",),), 1589 "BEFORE": tuple(), 1590 "AFTER": tuple(), 1591 } 1592 1593 TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = { 1594 "NOT": (("DEFERRABLE",),), 1595 "DEFERRABLE": tuple(), 1596 } 1597 1598 CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = { 1599 "SCALE": ("EXTEND", "NOEXTEND"), 1600 "SHARD": ("EXTEND", "NOEXTEND"), 1601 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1602 **dict.fromkeys( 1603 ( 1604 "SESSION", 1605 "GLOBAL", 1606 "KEEP", 1607 "NOKEEP", 1608 "ORDER", 1609 "NOORDER", 1610 "NOCACHE", 1611 "CYCLE", 1612 "NOCYCLE", 1613 "NOMINVALUE", 1614 "NOMAXVALUE", 1615 "NOSCALE", 1616 "NOSHARD", 1617 ), 1618 tuple(), 1619 ), 1620 } 1621 1622 ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")} 1623 1624 USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1625 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1626 ) 1627 1628 CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1629 1630 SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1631 "TYPE": ("EVOLUTION",), 1632 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1633 } 1634 1635 PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {} 1636 1637 EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1638 ("CALLER", "SELF", "OWNER"), tuple() 1639 ) 1640 1641 KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1642 "NOT": ("ENFORCED",), 1643 "MATCH": ( 1644 "FULL", 1645 "PARTIAL", 1646 "SIMPLE", 1647 ), 1648 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1649 "USING": ( 1650 "BTREE", 1651 "HASH", 1652 ), 1653 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1654 } 1655 1656 WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1657 "NO": ("OTHERS",), 1658 "CURRENT": ("ROW",), 1659 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1660 } 1661 1662 INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1663 1664 CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"} 1665 HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"} 1666 HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1667 1668 OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"} 1669 1670 OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN} 1671 1672 TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1673 1674 VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1675 1676 WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1677 WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER} 1678 WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"} 1679 1680 JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1681 1682 FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1683 1684 ADD_CONSTRAINT_TOKENS: t.ClassVar = { 1685 TokenType.CONSTRAINT, 1686 TokenType.FOREIGN_KEY, 1687 TokenType.INDEX, 1688 TokenType.KEY, 1689 TokenType.PRIMARY_KEY, 1690 TokenType.UNIQUE, 1691 } 1692 1693 DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT} 1694 1695 UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1696 1697 SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1698 1699 COPY_INTO_VARLEN_OPTIONS: t.ClassVar = { 1700 "FILE_FORMAT", 1701 "COPY_OPTIONS", 1702 "FORMAT_OPTIONS", 1703 "CREDENTIAL", 1704 } 1705 1706 IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1707 1708 ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {} 1709 1710 ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1711 1712 PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1713 1714 # The style options for the DESCRIBE statement 1715 DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1716 1717 SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"} 1718 1719 # The style options for the ANALYZE statement 1720 ANALYZE_STYLES: t.ClassVar = { 1721 "BUFFER_USAGE_LIMIT", 1722 "FULL", 1723 "LOCAL", 1724 "NO_WRITE_TO_BINLOG", 1725 "SAMPLE", 1726 "SKIP_LOCKED", 1727 "VERBOSE", 1728 } 1729 1730 ANALYZE_EXPRESSION_PARSERS: t.ClassVar = { 1731 "ALL": lambda self: self._parse_analyze_columns(), 1732 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1733 "DELETE": lambda self: self._parse_analyze_delete(), 1734 "DROP": lambda self: self._parse_analyze_histogram(), 1735 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1736 "LIST": lambda self: self._parse_analyze_list(), 1737 "PREDICATE": lambda self: self._parse_analyze_columns(), 1738 "UPDATE": lambda self: self._parse_analyze_histogram(), 1739 "VALIDATE": lambda self: self._parse_analyze_validate(), 1740 } 1741 1742 PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"} 1743 1744 AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET) 1745 1746 OPERATION_MODIFIERS: t.ClassVar[set[str]] = set() 1747 1748 RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"} 1749 1750 SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"} 1751 1752 MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1753 1754 STRICT_CAST: t.ClassVar = True 1755 1756 PREFIXED_PIVOT_COLUMNS: t.ClassVar = False 1757 IDENTIFY_PIVOT_STRINGS: t.ClassVar = False 1758 1759 LOG_DEFAULTS_TO_LN: t.ClassVar = False 1760 1761 # Whether the table sample clause expects CSV syntax 1762 TABLESAMPLE_CSV: t.ClassVar = False 1763 1764 # The default method used for table sampling 1765 DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None 1766 1767 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1768 SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True 1769 1770 # Whether the TRIM function expects the characters to trim as its first argument 1771 TRIM_PATTERN_FIRST: t.ClassVar = False 1772 1773 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1774 STRING_ALIASES: t.ClassVar = False 1775 1776 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1777 MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True 1778 SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"} 1779 1780 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1781 NO_PAREN_IF_COMMANDS: t.ClassVar = True 1782 1783 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1784 JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False 1785 1786 # Whether the `:` operator is used to extract a value from a VARIANT column 1787 COLON_IS_VARIANT_EXTRACT: t.ClassVar = False 1788 1789 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1790 # If this is True and '(' is not found, the keyword will be treated as an identifier 1791 VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True 1792 1793 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1794 SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False 1795 1796 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1797 INTERVAL_SPANS: t.ClassVar = True 1798 1799 # Whether a PARTITION clause can follow a table reference 1800 SUPPORTS_PARTITION_SELECTION: t.ClassVar = False 1801 1802 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1803 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True 1804 1805 # Whether the 'AS' keyword is optional in the CTE definition syntax 1806 OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True 1807 1808 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1809 ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True 1810 1811 # Whether Alter statements are allowed to contain Partition specifications 1812 ALTER_TABLE_PARTITIONS: t.ClassVar = False 1813 1814 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1815 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1816 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1817 # as BigQuery, where all joins have the same precedence. 1818 JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False 1819 1820 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1821 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False 1822 1823 # Whether map literals support arbitrary expressions as keys. 1824 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1825 # When False, keys are typically restricted to identifiers. 1826 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False 1827 1828 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1829 # is true for Snowflake but not for BigQuery which can also process strings 1830 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False 1831 1832 # Dialects like Databricks support JOINS without join criteria 1833 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1834 ADD_JOIN_ON_TRUE: t.ClassVar = False 1835 1836 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1837 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1838 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False 1839 1840 SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS) 1841 SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS) 1842 1843 def __init__( 1844 self, 1845 error_level: ErrorLevel | None = None, 1846 error_message_context: int = 100, 1847 max_errors: int = 3, 1848 max_nodes: int = -1, 1849 dialect: DialectType = None, 1850 ): 1851 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1852 self.error_message_context: int = error_message_context 1853 self.max_errors: int = max_errors 1854 self.max_nodes: int = max_nodes 1855 self.dialect: t.Any = _resolve_dialect(dialect) 1856 self.sql: str = "" 1857 self.errors: list[ParseError] = [] 1858 self._tokens: list[Token] = [] 1859 self._tokens_size: i64 = 0 1860 self._index: i64 = 0 1861 self._curr: Token = SENTINEL_NONE 1862 self._next: Token = SENTINEL_NONE 1863 self._prev: Token = SENTINEL_NONE 1864 self._prev_comments: list[str] = [] 1865 self._pipe_cte_counter: int = 0 1866 self._chunks: list[list[Token]] = [] 1867 self._chunk_index: i64 = 0 1868 self._node_count: int = 0 1869 1870 def reset(self) -> None: 1871 self.sql = "" 1872 self.errors = [] 1873 self._tokens = [] 1874 self._tokens_size = 0 1875 self._index = 0 1876 self._curr = SENTINEL_NONE 1877 self._next = SENTINEL_NONE 1878 self._prev = SENTINEL_NONE 1879 self._prev_comments = [] 1880 self._pipe_cte_counter = 0 1881 self._chunks = [] 1882 self._chunk_index = 0 1883 self._node_count = 0 1884 1885 def _advance(self, times: i64 = 1) -> None: 1886 index = self._index + times 1887 self._index = index 1888 tokens = self._tokens 1889 size = self._tokens_size 1890 self._curr = tokens[index] if index < size else SENTINEL_NONE 1891 self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE 1892 1893 if index > 0: 1894 prev = tokens[index - 1] 1895 self._prev = prev 1896 self._prev_comments = prev.comments 1897 else: 1898 self._prev = SENTINEL_NONE 1899 self._prev_comments = [] 1900 1901 def _advance_chunk(self) -> None: 1902 self._index = -1 1903 self._tokens = self._chunks[self._chunk_index] 1904 self._tokens_size = i64(len(self._tokens)) 1905 self._chunk_index += 1 1906 self._advance() 1907 1908 def _retreat(self, index: i64) -> None: 1909 if index != self._index: 1910 self._advance(index - self._index) 1911 1912 def _add_comments(self, expression: exp.Expr | None) -> None: 1913 if expression and self._prev_comments: 1914 expression.add_comments(self._prev_comments) 1915 self._prev_comments = [] 1916 1917 def _match( 1918 self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None 1919 ) -> bool: 1920 if self._curr.token_type == token_type: 1921 if advance: 1922 self._advance() 1923 self._add_comments(expression) 1924 return True 1925 return False 1926 1927 def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool: 1928 if self._curr.token_type in types: 1929 if advance: 1930 self._advance() 1931 return True 1932 return False 1933 1934 def _match_pair( 1935 self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True 1936 ) -> bool: 1937 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 1938 if advance: 1939 self._advance(2) 1940 return True 1941 return False 1942 1943 def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool: 1944 if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts: 1945 if advance: 1946 self._advance() 1947 return True 1948 return False 1949 1950 def _match_text_seq(self, *texts: str, advance: bool = True) -> bool: 1951 index = self._index 1952 string_type = TokenType.STRING 1953 for text in texts: 1954 if self._curr.token_type != string_type and self._curr.text.upper() == text: 1955 self._advance() 1956 else: 1957 self._retreat(index) 1958 return False 1959 1960 if not advance: 1961 self._retreat(index) 1962 1963 return True 1964 1965 def _is_connected(self) -> bool: 1966 prev = self._prev 1967 curr = self._curr 1968 return bool(prev and curr and prev.end + 1 == curr.start) 1969 1970 def _find_sql(self, start: Token, end: Token) -> str: 1971 return self.sql[start.start : end.end + 1] 1972 1973 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1974 token = token or self._curr or self._prev or Token.string("") 1975 formatted_sql, start_context, highlight, end_context = highlight_sql( 1976 sql=self.sql, 1977 positions=[(token.start, token.end)], 1978 context_length=self.error_message_context, 1979 ) 1980 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1981 1982 error = ParseError.new( 1983 formatted_message, 1984 description=message, 1985 line=token.line, 1986 col=token.col, 1987 start_context=start_context, 1988 highlight=highlight, 1989 end_context=end_context, 1990 ) 1991 1992 if self.error_level == ErrorLevel.IMMEDIATE: 1993 raise error 1994 1995 self.errors.append(error) 1996 1997 def validate_expression(self, expression: E, args: list | None = None) -> E: 1998 if self.max_nodes > -1: 1999 self._node_count += 1 2000 if self._node_count > self.max_nodes: 2001 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 2002 if self.error_level != ErrorLevel.IGNORE: 2003 for error_message in expression.error_messages(args): 2004 self.raise_error(error_message) 2005 return expression 2006 2007 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None: 2008 index = self._index 2009 error_level = self.error_level 2010 this: T | None = None 2011 2012 self.error_level = ErrorLevel.IMMEDIATE 2013 try: 2014 this = parse_method() 2015 except ParseError: 2016 this = None 2017 finally: 2018 if not this or retreat: 2019 self._retreat(index) 2020 self.error_level = error_level 2021 2022 return this 2023 2024 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2025 """ 2026 Parses a list of tokens and returns a list of syntax trees, one tree 2027 per parsed SQL statement. 2028 2029 Args: 2030 raw_tokens: The list of tokens. 2031 sql: The original SQL string. 2032 2033 Returns: 2034 The list of the produced syntax trees. 2035 """ 2036 return self._parse( 2037 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2038 ) 2039 2040 def parse_into( 2041 self, 2042 expression_types: exp.IntoType, 2043 raw_tokens: list[Token], 2044 sql: str | None = None, 2045 ) -> list[exp.Expr | None]: 2046 """ 2047 Parses a list of tokens into a given Expr type. If a collection of Expr 2048 types is given instead, this method will try to parse the token list into each one 2049 of them, stopping at the first for which the parsing succeeds. 2050 2051 Args: 2052 expression_types: The expression type(s) to try and parse the token list into. 2053 raw_tokens: The list of tokens. 2054 sql: The original SQL string, used to produce helpful debug messages. 2055 2056 Returns: 2057 The target Expr. 2058 """ 2059 errors = [] 2060 for expression_type in ensure_list(expression_types): 2061 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2062 if not parser: 2063 raise TypeError(f"No parser registered for {expression_type}") 2064 2065 try: 2066 return self._parse(parser, raw_tokens, sql) 2067 except ParseError as e: 2068 e.errors[0]["into_expression"] = expression_type 2069 errors.append(e) 2070 2071 raise ParseError( 2072 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2073 errors=merge_errors(errors), 2074 ) from errors[-1] 2075 2076 def check_errors(self) -> None: 2077 """Logs or raises any found errors, depending on the chosen error level setting.""" 2078 if self.error_level == ErrorLevel.WARN: 2079 for error in self.errors: 2080 logger.error(str(error)) 2081 elif self.error_level == ErrorLevel.RAISE and self.errors: 2082 raise ParseError( 2083 concat_messages(self.errors, self.max_errors), 2084 errors=merge_errors(self.errors), 2085 ) 2086 2087 def expression( 2088 self, 2089 instance: E, 2090 token: Token | None = None, 2091 comments: list[str] | None = None, 2092 ) -> E: 2093 if token: 2094 instance.update_positions(token) 2095 instance.add_comments(comments) if comments else self._add_comments(instance) 2096 if not instance.is_primitive: 2097 instance = self.validate_expression(instance) 2098 return instance 2099 2100 def _parse_batch_statements( 2101 self, 2102 parse_method: t.Callable[[Parser], exp.Expr | None], 2103 sep_first_statement: bool = True, 2104 ) -> list[exp.Expr | None]: 2105 expressions = [] 2106 2107 # Chunkification binds if/while statements with the first statement of the body 2108 if sep_first_statement: 2109 self._match(TokenType.BEGIN) 2110 expressions.append(parse_method(self)) 2111 2112 chunks_length = len(self._chunks) 2113 while self._chunk_index < chunks_length: 2114 self._advance_chunk() 2115 2116 if self._match(TokenType.ELSE, advance=False): 2117 return expressions 2118 2119 if expressions and not self._next and self._match(TokenType.END): 2120 expressions.append(exp.EndStatement()) 2121 continue 2122 2123 expressions.append(parse_method(self)) 2124 2125 if self._index < self._tokens_size: 2126 self.raise_error("Invalid expression / Unexpected token") 2127 2128 self.check_errors() 2129 2130 return expressions 2131 2132 def _parse( 2133 self, 2134 parse_method: t.Callable[[Parser], exp.Expr | None], 2135 raw_tokens: list[Token], 2136 sql: str | None = None, 2137 ) -> list[exp.Expr | None]: 2138 self.reset() 2139 self.sql = sql or "" 2140 2141 total = len(raw_tokens) 2142 chunks: list[list[Token]] = [[]] 2143 2144 for i, token in enumerate(raw_tokens): 2145 if token.token_type == TokenType.SEMICOLON: 2146 if token.comments: 2147 chunks.append([token]) 2148 2149 if i < total - 1: 2150 chunks.append([]) 2151 else: 2152 chunks[-1].append(token) 2153 2154 self._chunks = chunks 2155 2156 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 2157 2158 def _warn_unsupported(self) -> None: 2159 if self._tokens_size <= 1: 2160 return 2161 2162 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 2163 # interested in emitting a warning for the one being currently processed. 2164 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 2165 2166 logger.warning( 2167 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 2168 ) 2169 2170 def _parse_command(self) -> exp.Command: 2171 self._warn_unsupported() 2172 comments = self._prev_comments 2173 return self.expression( 2174 exp.Command(this=self._prev.text.upper(), expression=self._parse_string()), 2175 comments=comments, 2176 ) 2177 2178 def _parse_comment(self, allow_exists: bool = True) -> exp.Expr: 2179 start = self._prev 2180 exists = self._parse_exists() if allow_exists else None 2181 2182 self._match(TokenType.ON) 2183 2184 materialized = self._match_text_seq("MATERIALIZED") 2185 kind = self._match_set(self.CREATABLES) and self._prev 2186 if not kind: 2187 return self._parse_as_command(start) 2188 2189 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2190 this = self._parse_user_defined_function(kind=kind.token_type) 2191 elif kind.token_type == TokenType.TABLE: 2192 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 2193 elif kind.token_type == TokenType.COLUMN: 2194 this = self._parse_column() 2195 else: 2196 this = self._parse_id_var() 2197 2198 self._match(TokenType.IS) 2199 2200 return self.expression( 2201 exp.Comment( 2202 this=this, 2203 kind=kind.text, 2204 expression=self._parse_string(), 2205 exists=exists, 2206 materialized=materialized, 2207 ) 2208 ) 2209 2210 def _parse_to_table( 2211 self, 2212 ) -> exp.ToTableProperty: 2213 table = self._parse_table_parts(schema=True) 2214 return self.expression(exp.ToTableProperty(this=table)) 2215 2216 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2217 def _parse_ttl(self) -> exp.Expr: 2218 def _parse_ttl_action() -> exp.Expr | None: 2219 this = self._parse_bitwise() 2220 2221 if self._match_text_seq("DELETE"): 2222 return self.expression(exp.MergeTreeTTLAction(this=this, delete=True)) 2223 if self._match_text_seq("RECOMPRESS"): 2224 return self.expression( 2225 exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise()) 2226 ) 2227 if self._match_text_seq("TO", "DISK"): 2228 return self.expression( 2229 exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string()) 2230 ) 2231 if self._match_text_seq("TO", "VOLUME"): 2232 return self.expression( 2233 exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string()) 2234 ) 2235 2236 return this 2237 2238 expressions = self._parse_csv(_parse_ttl_action) 2239 where = self._parse_where() 2240 group = self._parse_group() 2241 2242 aggregates = None 2243 if group and self._match(TokenType.SET): 2244 aggregates = self._parse_csv(self._parse_set_item) 2245 2246 return self.expression( 2247 exp.MergeTreeTTL( 2248 expressions=expressions, where=where, group=group, aggregates=aggregates 2249 ) 2250 ) 2251 2252 def _parse_condition(self) -> exp.Expr | None: 2253 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2254 2255 def _parse_block(self) -> exp.Block: 2256 return self.expression( 2257 exp.Block( 2258 expressions=self._parse_batch_statements( 2259 parse_method=lambda self: self._parse_statement() 2260 ) 2261 ) 2262 ) 2263 2264 def _parse_whileblock(self) -> exp.WhileBlock: 2265 return self.expression( 2266 exp.WhileBlock(this=self._parse_condition(), body=self._parse_block()) 2267 ) 2268 2269 def _parse_statement(self) -> exp.Expr | None: 2270 if not self._curr: 2271 return None 2272 2273 if self._match_set(self.STATEMENT_PARSERS): 2274 comments = self._prev_comments 2275 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2276 stmt.add_comments(comments, prepend=True) 2277 return stmt 2278 2279 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2280 return self._parse_command() 2281 2282 if self._match_text_seq("WHILE"): 2283 return self._parse_whileblock() 2284 2285 expression = self._parse_expression() 2286 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2287 2288 if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False): 2289 expression = self._parse_pipe_syntax_query(expression) 2290 2291 return self._parse_query_modifiers(expression) 2292 2293 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2294 start = self._prev 2295 temporary = self._match(TokenType.TEMPORARY) 2296 materialized = self._match_text_seq("MATERIALIZED") 2297 iceberg = self._match_text_seq("ICEBERG") 2298 2299 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2300 if not kind or (iceberg and kind and kind != "TABLE"): 2301 return self._parse_as_command(start) 2302 2303 concurrently = self._match_text_seq("CONCURRENTLY") 2304 if_exists = exists or self._parse_exists() 2305 2306 if kind == "COLUMN": 2307 this = self._parse_column() 2308 else: 2309 this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA") 2310 2311 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2312 2313 if self._match(TokenType.L_PAREN, advance=False): 2314 expressions = self._parse_wrapped_csv(self._parse_types) 2315 else: 2316 expressions = None 2317 2318 cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper() 2319 2320 return self.expression( 2321 exp.Drop( 2322 exists=if_exists, 2323 this=this, 2324 expressions=expressions, 2325 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2326 temporary=temporary, 2327 materialized=materialized, 2328 cascade=cascade_or_restrict == "CASCADE", 2329 restrict=cascade_or_restrict == "RESTRICT", 2330 constraints=self._match_text_seq("CONSTRAINTS"), 2331 purge=self._match_text_seq("PURGE"), 2332 cluster=cluster, 2333 concurrently=concurrently, 2334 sync=self._match_text_seq("SYNC"), 2335 iceberg=iceberg, 2336 ) 2337 ) 2338 2339 def _parse_exists(self, not_: bool = False) -> bool | None: 2340 return ( 2341 self._match_text_seq("IF") 2342 and (not not_ or self._match(TokenType.NOT)) 2343 and self._match(TokenType.EXISTS) 2344 ) 2345 2346 def _parse_create(self) -> exp.Create | exp.Command: 2347 # Note: this can't be None because we've matched a statement parser 2348 start = self._prev 2349 2350 replace = ( 2351 start.token_type == TokenType.REPLACE 2352 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2353 or self._match_pair(TokenType.OR, TokenType.ALTER) 2354 ) 2355 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2356 2357 unique = self._match(TokenType.UNIQUE) 2358 2359 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2360 clustered = True 2361 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2362 "COLUMNSTORE" 2363 ): 2364 clustered = False 2365 else: 2366 clustered = None 2367 2368 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2369 self._advance() 2370 2371 properties = None 2372 create_token = self._match_set(self.CREATABLES) and self._prev 2373 2374 if not create_token: 2375 # exp.Properties.Location.POST_CREATE 2376 properties = self._parse_properties() 2377 create_token = self._match_set(self.CREATABLES) and self._prev 2378 2379 if not properties or not create_token: 2380 return self._parse_as_command(start) 2381 2382 create_token_type = t.cast(Token, create_token).token_type 2383 2384 concurrently = self._match_text_seq("CONCURRENTLY") 2385 exists = self._parse_exists(not_=True) 2386 this = None 2387 expression: exp.Expr | None = None 2388 indexes = None 2389 no_schema_binding = None 2390 begin = None 2391 clone = None 2392 2393 def extend_props(temp_props: exp.Properties | None) -> None: 2394 nonlocal properties 2395 if properties and temp_props: 2396 properties.expressions.extend(temp_props.expressions) 2397 elif temp_props: 2398 properties = temp_props 2399 2400 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2401 this = self._parse_user_defined_function(kind=create_token_type) 2402 2403 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2404 extend_props(self._parse_properties()) 2405 2406 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2407 extend_props(self._parse_function_properties()) 2408 2409 if not expression: 2410 if self._match(TokenType.COMMAND): 2411 expression = self._parse_as_command(self._prev) 2412 else: 2413 begin = self._match(TokenType.BEGIN) 2414 return_ = self._match_text_seq("RETURN") 2415 2416 if self._match(TokenType.STRING, advance=False): 2417 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2418 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2419 expression = self._parse_string() 2420 extend_props(self._parse_properties()) 2421 else: 2422 expression = ( 2423 self._parse_user_defined_function_expression() 2424 if create_token_type == TokenType.FUNCTION 2425 else self._parse_block() 2426 ) 2427 2428 if return_: 2429 expression = self.expression(exp.Return(this=expression)) 2430 elif create_token_type == TokenType.INDEX: 2431 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2432 if not self._match(TokenType.ON): 2433 index = self._parse_id_var() 2434 anonymous = False 2435 else: 2436 index = None 2437 anonymous = True 2438 2439 this = self._parse_index(index=index, anonymous=anonymous) 2440 elif ( 2441 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2442 ) or create_token_type == TokenType.TRIGGER: 2443 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2444 create_token = self._prev 2445 2446 trigger_name = self._parse_id_var() 2447 if not trigger_name: 2448 return self._parse_as_command(start) 2449 2450 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2451 timing = timing_var.this if timing_var else None 2452 if not timing: 2453 return self._parse_as_command(start) 2454 2455 events = self._parse_trigger_events() 2456 if not self._match(TokenType.ON): 2457 self.raise_error("Expected ON in trigger definition") 2458 2459 table = self._parse_table_parts() 2460 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2461 deferrable, initially = self._parse_trigger_deferrable() 2462 referencing = self._parse_trigger_referencing() 2463 for_each = self._parse_trigger_for_each() 2464 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2465 self._parse_disjunction, optional=True 2466 ) 2467 execute = self._parse_trigger_execute() 2468 2469 if execute is None: 2470 return self._parse_as_command(start) 2471 2472 trigger_props = self.expression( 2473 exp.TriggerProperties( 2474 table=table, 2475 timing=timing, 2476 events=events, 2477 execute=execute, 2478 constraint=is_constraint, 2479 referenced_table=referenced_table, 2480 deferrable=deferrable, 2481 initially=initially, 2482 referencing=referencing, 2483 for_each=for_each, 2484 when=when, 2485 ) 2486 ) 2487 2488 this = trigger_name 2489 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2490 elif create_token_type == TokenType.TYPE: 2491 this = self._parse_table_parts(schema=True) 2492 if not this or not self._match(TokenType.ALIAS): 2493 return self._parse_as_command(start) 2494 2495 if self._match(TokenType.ENUM): 2496 expression = exp.DataType( 2497 this=exp.DType.ENUM, 2498 expressions=self._parse_wrapped_csv(self._parse_string), 2499 ) 2500 elif self._match(TokenType.L_PAREN, advance=False): 2501 expression = self._parse_schema() 2502 else: 2503 return self._parse_as_command(start) 2504 elif create_token_type in self.DB_CREATABLES: 2505 table_parts = self._parse_table_parts( 2506 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2507 ) 2508 2509 # exp.Properties.Location.POST_NAME 2510 self._match(TokenType.COMMA) 2511 extend_props(self._parse_properties(before=True)) 2512 2513 this = self._parse_schema(this=table_parts) 2514 2515 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2516 extend_props(self._parse_properties()) 2517 2518 has_alias = self._match(TokenType.ALIAS) 2519 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2520 # exp.Properties.Location.POST_ALIAS 2521 extend_props(self._parse_properties()) 2522 2523 if create_token_type == TokenType.SEQUENCE: 2524 expression = self._parse_types() 2525 props = self._parse_properties() 2526 if props: 2527 sequence_props = exp.SequenceProperties() 2528 options = [] 2529 for prop in props: 2530 if isinstance(prop, exp.SequenceProperties): 2531 for arg, value in prop.args.items(): 2532 if arg == "options": 2533 options.extend(value) 2534 else: 2535 sequence_props.set(arg, value) 2536 prop.pop() 2537 2538 if options: 2539 sequence_props.set("options", options) 2540 2541 props.append("expressions", sequence_props) 2542 extend_props(props) 2543 else: 2544 expression = self._parse_ddl_select() 2545 2546 # Some dialects also support using a table as an alias instead of a SELECT. 2547 # Here we fallback to this as an alternative. 2548 if not expression and has_alias: 2549 expression = self._try_parse(self._parse_table_parts) 2550 2551 if create_token_type == TokenType.TABLE: 2552 # exp.Properties.Location.POST_EXPRESSION 2553 extend_props(self._parse_properties()) 2554 2555 indexes = [] 2556 while True: 2557 index = self._parse_index() 2558 2559 # exp.Properties.Location.POST_INDEX 2560 extend_props(self._parse_properties()) 2561 if not index: 2562 break 2563 else: 2564 self._match(TokenType.COMMA) 2565 indexes.append(index) 2566 elif create_token_type == TokenType.VIEW: 2567 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2568 no_schema_binding = True 2569 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2570 extend_props(self._parse_properties()) 2571 2572 shallow = self._match_text_seq("SHALLOW") 2573 2574 if self._match_texts(self.CLONE_KEYWORDS): 2575 copy = self._prev.text.lower() == "copy" 2576 clone = self.expression( 2577 exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy) 2578 ) 2579 2580 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2581 return self._parse_as_command(start) 2582 2583 create_kind_text = create_token.text.upper() 2584 return self.expression( 2585 exp.Create( 2586 this=this, 2587 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2588 replace=replace, 2589 refresh=refresh, 2590 unique=unique, 2591 expression=expression, 2592 exists=exists, 2593 properties=properties, 2594 indexes=indexes, 2595 no_schema_binding=no_schema_binding, 2596 begin=begin, 2597 clone=clone, 2598 concurrently=concurrently, 2599 clustered=clustered, 2600 ) 2601 ) 2602 2603 def _parse_sequence_properties(self) -> exp.SequenceProperties | None: 2604 seq = exp.SequenceProperties() 2605 2606 options = [] 2607 index = self._index 2608 2609 while self._curr: 2610 self._match(TokenType.COMMA) 2611 if self._match_text_seq("INCREMENT"): 2612 self._match_text_seq("BY") 2613 self._match_text_seq("=") 2614 seq.set("increment", self._parse_term()) 2615 elif self._match_text_seq("MINVALUE"): 2616 seq.set("minvalue", self._parse_term()) 2617 elif self._match_text_seq("MAXVALUE"): 2618 seq.set("maxvalue", self._parse_term()) 2619 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2620 self._match_text_seq("=") 2621 seq.set("start", self._parse_term()) 2622 elif self._match_text_seq("CACHE"): 2623 # T-SQL allows empty CACHE which is initialized dynamically 2624 seq.set("cache", self._parse_number() or True) 2625 elif self._match_text_seq("OWNED", "BY"): 2626 # "OWNED BY NONE" is the default 2627 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2628 else: 2629 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2630 if opt: 2631 options.append(opt) 2632 else: 2633 break 2634 2635 seq.set("options", options if options else None) 2636 return None if self._index == index else seq 2637 2638 def _parse_trigger_events(self) -> list[exp.TriggerEvent]: 2639 events = [] 2640 2641 while True: 2642 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2643 2644 if not event_type: 2645 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2646 2647 columns = ( 2648 self._parse_csv(self._parse_column) 2649 if event_type == "UPDATE" and self._match_text_seq("OF") 2650 else None 2651 ) 2652 2653 events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns))) 2654 2655 if not self._match(TokenType.OR): 2656 break 2657 2658 return events 2659 2660 def _parse_trigger_deferrable( 2661 self, 2662 ) -> tuple[str | None, str | None]: 2663 deferrable_var = self._parse_var_from_options( 2664 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2665 ) 2666 deferrable = deferrable_var.this if deferrable_var else None 2667 2668 initially = None 2669 if deferrable and self._match_text_seq("INITIALLY"): 2670 initially = ( 2671 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2672 ) 2673 2674 return deferrable, initially 2675 2676 def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None: 2677 if not self._match_text_seq(keyword): 2678 return None 2679 if not self._match_text_seq("TABLE"): 2680 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2681 self._match_text_seq("AS") 2682 return self._parse_id_var() 2683 2684 def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None: 2685 if not self._match_text_seq("REFERENCING"): 2686 return None 2687 2688 old_alias = None 2689 new_alias = None 2690 2691 while True: 2692 if alias := self._parse_trigger_referencing_clause("OLD"): 2693 if old_alias is not None: 2694 self.raise_error("Duplicate OLD clause in REFERENCING") 2695 old_alias = alias 2696 elif alias := self._parse_trigger_referencing_clause("NEW"): 2697 if new_alias is not None: 2698 self.raise_error("Duplicate NEW clause in REFERENCING") 2699 new_alias = alias 2700 else: 2701 break 2702 2703 if old_alias is None and new_alias is None: 2704 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2705 2706 return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias)) 2707 2708 def _parse_trigger_for_each(self) -> str | None: 2709 if not self._match_text_seq("FOR", "EACH"): 2710 return None 2711 2712 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2713 2714 def _parse_trigger_execute(self) -> exp.TriggerExecute | None: 2715 if not self._match(TokenType.EXECUTE): 2716 return None 2717 2718 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2719 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2720 2721 func_call = self._parse_column() 2722 return self.expression(exp.TriggerExecute(this=func_call)) 2723 2724 def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None: 2725 # only used for teradata currently 2726 self._match(TokenType.COMMA) 2727 2728 kwargs = { 2729 "no": self._match_text_seq("NO"), 2730 "dual": self._match_text_seq("DUAL"), 2731 "before": self._match_text_seq("BEFORE"), 2732 "default": self._match_text_seq("DEFAULT"), 2733 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2734 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2735 "after": self._match_text_seq("AFTER"), 2736 "minimum": self._match_texts(("MIN", "MINIMUM")), 2737 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2738 } 2739 2740 if self._match_texts(self.PROPERTY_PARSERS): 2741 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2742 try: 2743 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2744 except TypeError: 2745 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2746 2747 return None 2748 2749 def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]: 2750 return self._parse_wrapped_csv(self._parse_property) 2751 2752 def _parse_property(self) -> exp.Expr | list[exp.Expr] | None: 2753 if self._match_texts(self.PROPERTY_PARSERS): 2754 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2755 2756 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2757 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2758 2759 if self._match_text_seq("COMPOUND", "SORTKEY"): 2760 return self._parse_sortkey(compound=True) 2761 2762 if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"): 2763 return self.expression(exp.ParameterStyleProperty(this="PANDAS")) 2764 2765 index = self._index 2766 2767 seq_props = self._parse_sequence_properties() 2768 if seq_props: 2769 return seq_props 2770 2771 self._retreat(index) 2772 key = self._parse_column() 2773 2774 if not self._match(TokenType.EQ): 2775 self._retreat(index) 2776 return None 2777 2778 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2779 if isinstance(key, exp.Column): 2780 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2781 2782 value = self._parse_bitwise() or self._parse_var(any_token=True) 2783 2784 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2785 if isinstance(value, exp.Column): 2786 value = exp.var(value.name) 2787 2788 return self.expression(exp.Property(this=key, value=value)) 2789 2790 def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty: 2791 if self._match_text_seq("BY"): 2792 return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string())) 2793 2794 self._match(TokenType.ALIAS) 2795 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2796 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2797 2798 return self.expression( 2799 exp.FileFormatProperty( 2800 this=( 2801 self.expression( 2802 exp.InputOutputFormat( 2803 input_format=input_format, output_format=output_format 2804 ) 2805 ) 2806 if input_format or output_format 2807 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2808 ), 2809 hive_format=True, 2810 ) 2811 ) 2812 2813 def _parse_unquoted_field(self) -> exp.Expr | None: 2814 field = self._parse_field() 2815 if isinstance(field, exp.Identifier) and not field.quoted: 2816 field = exp.var(field) 2817 2818 return field 2819 2820 def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E: 2821 self._match(TokenType.EQ) 2822 self._match(TokenType.ALIAS) 2823 2824 return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs)) 2825 2826 def _parse_properties(self, before: bool | None = None) -> exp.Properties | None: 2827 properties = [] 2828 while True: 2829 if before: 2830 prop = self._parse_property_before() 2831 else: 2832 prop = self._parse_property() 2833 if not prop: 2834 break 2835 for p in ensure_list(prop): 2836 properties.append(p) 2837 2838 if properties: 2839 return self.expression(exp.Properties(expressions=properties)) 2840 2841 return None 2842 2843 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2844 return self.expression( 2845 exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION")) 2846 ) 2847 2848 def _parse_sql_security(self) -> exp.SqlSecurityProperty: 2849 return self.expression( 2850 exp.SqlSecurityProperty( 2851 this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper() 2852 ) 2853 ) 2854 2855 def _parse_settings_property(self) -> exp.SettingsProperty: 2856 return self.expression( 2857 exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment)) 2858 ) 2859 2860 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2861 if self._index >= 2: 2862 pre_volatile_token = self._tokens[self._index - 2] 2863 else: 2864 pre_volatile_token = None 2865 2866 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2867 return exp.VolatileProperty() 2868 2869 return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE"))) 2870 2871 def _parse_retention_period(self) -> exp.Var: 2872 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2873 number = self._parse_number() 2874 number_str = f"{number} " if number else "" 2875 unit = self._parse_var(any_token=True) 2876 return exp.var(f"{number_str}{unit}") 2877 2878 def _parse_system_versioning_property( 2879 self, with_: bool = False 2880 ) -> exp.WithSystemVersioningProperty: 2881 self._match(TokenType.EQ) 2882 prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_)) 2883 2884 if self._match_text_seq("OFF"): 2885 prop.set("on", False) 2886 return prop 2887 2888 self._match(TokenType.ON) 2889 if self._match(TokenType.L_PAREN): 2890 while self._curr and not self._match(TokenType.R_PAREN): 2891 if self._match_text_seq("HISTORY_TABLE", "="): 2892 prop.set("this", self._parse_table_parts()) 2893 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2894 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2895 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2896 prop.set("retention_period", self._parse_retention_period()) 2897 2898 self._match(TokenType.COMMA) 2899 2900 return prop 2901 2902 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2903 self._match(TokenType.EQ) 2904 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2905 prop = self.expression(exp.DataDeletionProperty(on=on)) 2906 2907 if self._match(TokenType.L_PAREN): 2908 while self._curr and not self._match(TokenType.R_PAREN): 2909 if self._match_text_seq("FILTER_COLUMN", "="): 2910 prop.set("filter_column", self._parse_column()) 2911 elif self._match_text_seq("RETENTION_PERIOD", "="): 2912 prop.set("retention_period", self._parse_retention_period()) 2913 2914 self._match(TokenType.COMMA) 2915 2916 return prop 2917 2918 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2919 kind = "HASH" 2920 expressions: list[exp.Expr] | None = None 2921 if self._match_text_seq("BY", "HASH"): 2922 expressions = self._parse_wrapped_csv(self._parse_id_var) 2923 elif self._match_text_seq("BY", "RANDOM"): 2924 kind = "RANDOM" 2925 2926 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2927 buckets: exp.Expr | None = None 2928 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2929 buckets = self._parse_number() 2930 2931 return self.expression( 2932 exp.DistributedByProperty( 2933 expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order() 2934 ) 2935 ) 2936 2937 def _parse_composite_key_property(self, expr_type: type[E]) -> E: 2938 self._match_text_seq("KEY") 2939 expressions = self._parse_wrapped_id_vars() 2940 return self.expression(expr_type(expressions=expressions)) 2941 2942 def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]: 2943 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2944 prop = self._parse_system_versioning_property(with_=True) 2945 self._match_r_paren() 2946 return prop 2947 2948 if self._match(TokenType.L_PAREN, advance=False): 2949 result: list[exp.Expr] = [] 2950 for i in self._parse_wrapped_properties(): 2951 result.extend(i) if isinstance(i, list) else result.append(i) 2952 return result 2953 2954 if self._match_text_seq("JOURNAL"): 2955 return self._parse_withjournaltable() 2956 2957 if self._match_texts(self.VIEW_ATTRIBUTES): 2958 return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper())) 2959 2960 if self._match_text_seq("DATA"): 2961 return self._parse_withdata(no=False) 2962 elif self._match_text_seq("NO", "DATA"): 2963 return self._parse_withdata(no=True) 2964 2965 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2966 return self._parse_serde_properties(with_=True) 2967 2968 if self._match(TokenType.SCHEMA): 2969 return self.expression( 2970 exp.WithSchemaBindingProperty( 2971 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS) 2972 ) 2973 ) 2974 2975 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2976 return self.expression( 2977 exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option)) 2978 ) 2979 2980 if not self._next: 2981 return None 2982 2983 return self._parse_withisolatedloading() 2984 2985 def _parse_procedure_option(self) -> exp.Expr | None: 2986 if self._match_text_seq("EXECUTE", "AS"): 2987 return self.expression( 2988 exp.ExecuteAsProperty( 2989 this=self._parse_var_from_options( 2990 self.EXECUTE_AS_OPTIONS, raise_unmatched=False 2991 ) 2992 or self._parse_string() 2993 ) 2994 ) 2995 2996 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2997 2998 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2999 def _parse_definer(self) -> exp.DefinerProperty | None: 3000 self._match(TokenType.EQ) 3001 3002 user = self._parse_id_var() 3003 self._match(TokenType.PARAMETER) 3004 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 3005 3006 if not user or not host: 3007 return None 3008 3009 return exp.DefinerProperty(this=f"{user}@{host}") 3010 3011 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 3012 self._match(TokenType.TABLE) 3013 self._match(TokenType.EQ) 3014 return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts())) 3015 3016 def _parse_log(self, no: bool = False) -> exp.LogProperty: 3017 return self.expression(exp.LogProperty(no=no)) 3018 3019 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 3020 return self.expression(exp.JournalProperty(**kwargs)) 3021 3022 def _parse_checksum(self) -> exp.ChecksumProperty: 3023 self._match(TokenType.EQ) 3024 3025 on = None 3026 if self._match(TokenType.ON): 3027 on = True 3028 elif self._match_text_seq("OFF"): 3029 on = False 3030 3031 return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT))) 3032 3033 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 3034 return self.expression( 3035 exp.Cluster( 3036 expressions=( 3037 self._parse_wrapped_csv(self._parse_ordered) 3038 if wrapped 3039 else self._parse_csv(self._parse_ordered) 3040 ) 3041 ) 3042 ) 3043 3044 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 3045 self._match_text_seq("BY") 3046 3047 self._match_l_paren() 3048 expressions = self._parse_csv(self._parse_column) 3049 self._match_r_paren() 3050 3051 if self._match_text_seq("SORTED", "BY"): 3052 self._match_l_paren() 3053 sorted_by = self._parse_csv(self._parse_ordered) 3054 self._match_r_paren() 3055 else: 3056 sorted_by = None 3057 3058 self._match(TokenType.INTO) 3059 buckets = self._parse_number() 3060 self._match_text_seq("BUCKETS") 3061 3062 return self.expression( 3063 exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets) 3064 ) 3065 3066 def _parse_copy_property(self) -> exp.CopyGrantsProperty | None: 3067 if not self._match_text_seq("GRANTS"): 3068 self._retreat(self._index - 1) 3069 return None 3070 3071 return self.expression(exp.CopyGrantsProperty()) 3072 3073 def _parse_freespace(self) -> exp.FreespaceProperty: 3074 self._match(TokenType.EQ) 3075 return self.expression( 3076 exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT)) 3077 ) 3078 3079 def _parse_mergeblockratio( 3080 self, no: bool = False, default: bool = False 3081 ) -> exp.MergeBlockRatioProperty: 3082 if self._match(TokenType.EQ): 3083 return self.expression( 3084 exp.MergeBlockRatioProperty( 3085 this=self._parse_number(), percent=self._match(TokenType.PERCENT) 3086 ) 3087 ) 3088 3089 return self.expression(exp.MergeBlockRatioProperty(no=no, default=default)) 3090 3091 def _parse_datablocksize( 3092 self, 3093 default: bool | None = None, 3094 minimum: bool | None = None, 3095 maximum: bool | None = None, 3096 ) -> exp.DataBlocksizeProperty: 3097 self._match(TokenType.EQ) 3098 size = self._parse_number() 3099 3100 units = None 3101 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 3102 units = self._prev.text 3103 3104 return self.expression( 3105 exp.DataBlocksizeProperty( 3106 size=size, units=units, default=default, minimum=minimum, maximum=maximum 3107 ) 3108 ) 3109 3110 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 3111 self._match(TokenType.EQ) 3112 always = self._match_text_seq("ALWAYS") 3113 manual = self._match_text_seq("MANUAL") 3114 never = self._match_text_seq("NEVER") 3115 default = self._match_text_seq("DEFAULT") 3116 3117 autotemp = None 3118 if self._match_text_seq("AUTOTEMP"): 3119 autotemp = self._parse_schema() 3120 3121 return self.expression( 3122 exp.BlockCompressionProperty( 3123 always=always, manual=manual, never=never, default=default, autotemp=autotemp 3124 ) 3125 ) 3126 3127 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None: 3128 index = self._index 3129 no = self._match_text_seq("NO") 3130 concurrent = self._match_text_seq("CONCURRENT") 3131 3132 if not self._match_text_seq("ISOLATED", "LOADING"): 3133 self._retreat(index) 3134 return None 3135 3136 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 3137 return self.expression( 3138 exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target) 3139 ) 3140 3141 def _parse_locking(self) -> exp.LockingProperty: 3142 if self._match(TokenType.TABLE): 3143 kind = "TABLE" 3144 elif self._match(TokenType.VIEW): 3145 kind = "VIEW" 3146 elif self._match(TokenType.ROW): 3147 kind = "ROW" 3148 elif self._match_text_seq("DATABASE"): 3149 kind = "DATABASE" 3150 else: 3151 kind = None 3152 3153 if kind in ("DATABASE", "TABLE", "VIEW"): 3154 this = self._parse_table_parts() 3155 else: 3156 this = None 3157 3158 if self._match(TokenType.FOR): 3159 for_or_in = "FOR" 3160 elif self._match(TokenType.IN): 3161 for_or_in = "IN" 3162 else: 3163 for_or_in = None 3164 3165 if self._match_text_seq("ACCESS"): 3166 lock_type = "ACCESS" 3167 elif self._match_texts(("EXCL", "EXCLUSIVE")): 3168 lock_type = "EXCLUSIVE" 3169 elif self._match_text_seq("SHARE"): 3170 lock_type = "SHARE" 3171 elif self._match_text_seq("READ"): 3172 lock_type = "READ" 3173 elif self._match_text_seq("WRITE"): 3174 lock_type = "WRITE" 3175 elif self._match_text_seq("CHECKSUM"): 3176 lock_type = "CHECKSUM" 3177 else: 3178 lock_type = None 3179 3180 override = self._match_text_seq("OVERRIDE") 3181 3182 return self.expression( 3183 exp.LockingProperty( 3184 this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override 3185 ) 3186 ) 3187 3188 def _parse_partition_by(self) -> list[exp.Expr]: 3189 if self._match(TokenType.PARTITION_BY): 3190 return self._parse_csv(self._parse_disjunction) 3191 return [] 3192 3193 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 3194 def _parse_partition_bound_expr() -> exp.Expr | None: 3195 if self._match_text_seq("MINVALUE"): 3196 return exp.var("MINVALUE") 3197 if self._match_text_seq("MAXVALUE"): 3198 return exp.var("MAXVALUE") 3199 return self._parse_bitwise() 3200 3201 this: exp.Expr | list[exp.Expr] | None = None 3202 expression = None 3203 from_expressions = None 3204 to_expressions = None 3205 3206 if self._match(TokenType.IN): 3207 this = self._parse_wrapped_csv(self._parse_bitwise) 3208 elif self._match(TokenType.FROM): 3209 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3210 self._match_text_seq("TO") 3211 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3212 elif self._match_text_seq("WITH", "(", "MODULUS"): 3213 this = self._parse_number() 3214 self._match_text_seq(",", "REMAINDER") 3215 expression = self._parse_number() 3216 self._match_r_paren() 3217 else: 3218 self.raise_error("Failed to parse partition bound spec.") 3219 3220 return self.expression( 3221 exp.PartitionBoundSpec( 3222 this=this, 3223 expression=expression, 3224 from_expressions=from_expressions, 3225 to_expressions=to_expressions, 3226 ) 3227 ) 3228 3229 # https://www.postgresql.org/docs/current/sql-createtable.html 3230 def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None: 3231 if not self._match_text_seq("OF"): 3232 self._retreat(self._index - 1) 3233 return None 3234 3235 this = self._parse_table(schema=True) 3236 3237 if self._match(TokenType.DEFAULT): 3238 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3239 elif self._match_text_seq("FOR", "VALUES"): 3240 expression = self._parse_partition_bound_spec() 3241 else: 3242 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3243 3244 return self.expression(exp.PartitionedOfProperty(this=this, expression=expression)) 3245 3246 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3247 self._match(TokenType.EQ) 3248 return self.expression( 3249 exp.PartitionedByProperty( 3250 this=self._parse_schema() or self._parse_bracket(self._parse_field()) 3251 ) 3252 ) 3253 3254 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3255 if self._match_text_seq("AND", "STATISTICS"): 3256 statistics = True 3257 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3258 statistics = False 3259 else: 3260 statistics = None 3261 3262 return self.expression(exp.WithDataProperty(no=no, statistics=statistics)) 3263 3264 def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None: 3265 if self._match_text_seq("SQL"): 3266 return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL")) 3267 return None 3268 3269 def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None: 3270 if self._match_text_seq("SQL", "DATA"): 3271 return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA")) 3272 return None 3273 3274 def _parse_no_property(self) -> exp.Expr | None: 3275 if self._match_text_seq("PRIMARY", "INDEX"): 3276 return exp.NoPrimaryIndexProperty() 3277 if self._match_text_seq("SQL"): 3278 return self.expression(exp.SqlReadWriteProperty(this="NO SQL")) 3279 return None 3280 3281 def _parse_on_property(self) -> exp.Expr | None: 3282 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3283 return exp.OnCommitProperty() 3284 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3285 return exp.OnCommitProperty(delete=True) 3286 return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var()))) 3287 3288 def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None: 3289 if self._match_text_seq("SQL", "DATA"): 3290 return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA")) 3291 return None 3292 3293 def _parse_distkey(self) -> exp.DistKeyProperty: 3294 return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var))) 3295 3296 def _parse_create_like(self) -> exp.LikeProperty | None: 3297 table = self._parse_table(schema=True) 3298 3299 options = [] 3300 while self._match_texts(("INCLUDING", "EXCLUDING")): 3301 this = self._prev.text.upper() 3302 3303 id_var = self._parse_id_var() 3304 if not id_var: 3305 return None 3306 3307 options.append( 3308 self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper()))) 3309 ) 3310 3311 return self.expression(exp.LikeProperty(this=table, expressions=options)) 3312 3313 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3314 return self.expression( 3315 exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound) 3316 ) 3317 3318 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3319 self._match(TokenType.EQ) 3320 return self.expression( 3321 exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default) 3322 ) 3323 3324 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3325 self._match_text_seq("WITH", "CONNECTION") 3326 return self.expression( 3327 exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts()) 3328 ) 3329 3330 def _parse_returns(self) -> exp.ReturnsProperty: 3331 value: exp.Expr | None 3332 null = None 3333 is_table = self._match(TokenType.TABLE) 3334 3335 if is_table: 3336 if self._match(TokenType.LT): 3337 value = self.expression( 3338 exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types)) 3339 ) 3340 if not self._match(TokenType.GT): 3341 self.raise_error("Expecting >") 3342 else: 3343 value = self._parse_schema(exp.var("TABLE")) 3344 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3345 null = True 3346 value = None 3347 else: 3348 value = self._parse_types() 3349 3350 return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null)) 3351 3352 def _parse_describe(self) -> exp.Describe: 3353 kind = self._prev.text if self._match_set(self.CREATABLES) else None 3354 style: str | None = ( 3355 self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None 3356 ) 3357 if self._match(TokenType.DOT): 3358 style = None 3359 self._retreat(self._index - 2) 3360 3361 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3362 3363 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3364 this = self._parse_statement() 3365 else: 3366 this = self._parse_table(schema=True) 3367 3368 properties = self._parse_properties() 3369 expressions = properties.expressions if properties else None 3370 partition = self._parse_partition() 3371 return self.expression( 3372 exp.Describe( 3373 this=this, 3374 style=style, 3375 kind=kind, 3376 expressions=expressions, 3377 partition=partition, 3378 format=format, 3379 as_json=self._match_text_seq("AS", "JSON"), 3380 ) 3381 ) 3382 3383 def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts: 3384 kind = self._prev.text.upper() 3385 expressions = [] 3386 3387 def parse_conditional_insert() -> exp.ConditionalInsert | None: 3388 if self._match(TokenType.WHEN): 3389 expression = self._parse_disjunction() 3390 self._match(TokenType.THEN) 3391 else: 3392 expression = None 3393 3394 else_ = self._match(TokenType.ELSE) 3395 3396 if not self._match(TokenType.INTO): 3397 return None 3398 3399 return self.expression( 3400 exp.ConditionalInsert( 3401 this=self.expression( 3402 exp.Insert( 3403 this=self._parse_table(schema=True), 3404 expression=self._parse_derived_table_values(), 3405 ) 3406 ), 3407 expression=expression, 3408 else_=else_, 3409 ) 3410 ) 3411 3412 expression = parse_conditional_insert() 3413 while expression is not None: 3414 expressions.append(expression) 3415 expression = parse_conditional_insert() 3416 3417 return self.expression( 3418 exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()), 3419 comments=comments, 3420 ) 3421 3422 def _parse_insert(self) -> exp.Insert | exp.MultitableInserts: 3423 comments: list[str] = [] 3424 hint = self._parse_hint() 3425 overwrite = self._match(TokenType.OVERWRITE) 3426 ignore = self._match(TokenType.IGNORE) 3427 local = self._match_text_seq("LOCAL") 3428 alternative = None 3429 is_function = None 3430 3431 if self._match_text_seq("DIRECTORY"): 3432 this: exp.Expr | None = self.expression( 3433 exp.Directory( 3434 this=self._parse_var_or_string(), 3435 local=local, 3436 row_format=self._parse_row_format(match_row=True), 3437 ) 3438 ) 3439 else: 3440 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3441 comments += ensure_list(self._prev_comments) 3442 return self._parse_multitable_inserts(comments) 3443 3444 if self._match(TokenType.OR): 3445 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3446 3447 self._match(TokenType.INTO) 3448 comments += ensure_list(self._prev_comments) 3449 self._match(TokenType.TABLE) 3450 is_function = self._match(TokenType.FUNCTION) 3451 3452 this = self._parse_function() if is_function else self._parse_insert_table() 3453 3454 returning = self._parse_returning() # TSQL allows RETURNING before source 3455 3456 return self.expression( 3457 exp.Insert( 3458 hint=hint, 3459 is_function=is_function, 3460 this=this, 3461 stored=self._match_text_seq("STORED") and self._parse_stored(), 3462 by_name=self._match_text_seq("BY", "NAME"), 3463 exists=self._parse_exists(), 3464 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3465 and self._parse_disjunction(), 3466 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3467 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3468 default=self._match_text_seq("DEFAULT", "VALUES"), 3469 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3470 conflict=self._parse_on_conflict(), 3471 returning=returning or self._parse_returning(), 3472 overwrite=overwrite, 3473 alternative=alternative, 3474 ignore=ignore, 3475 source=self._match(TokenType.TABLE) and self._parse_table(), 3476 ), 3477 comments=comments, 3478 ) 3479 3480 def _parse_insert_table(self) -> exp.Expr | None: 3481 this = self._parse_table(schema=True, parse_partition=True) 3482 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3483 this.set("alias", self._parse_table_alias()) 3484 return this 3485 3486 def _parse_kill(self) -> exp.Kill: 3487 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3488 3489 return self.expression(exp.Kill(this=self._parse_primary(), kind=kind)) 3490 3491 def _parse_on_conflict(self) -> exp.OnConflict | None: 3492 conflict = self._match_text_seq("ON", "CONFLICT") 3493 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3494 3495 if not conflict and not duplicate: 3496 return None 3497 3498 conflict_keys = None 3499 constraint = None 3500 3501 if conflict: 3502 if self._match_text_seq("ON", "CONSTRAINT"): 3503 constraint = self._parse_id_var() 3504 elif self._match(TokenType.L_PAREN): 3505 conflict_keys = self._parse_csv(self._parse_id_var) 3506 self._match_r_paren() 3507 3508 index_predicate = self._parse_where() 3509 3510 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3511 if self._prev.token_type == TokenType.UPDATE: 3512 self._match(TokenType.SET) 3513 expressions = self._parse_csv(self._parse_equality) 3514 else: 3515 expressions = None 3516 3517 return self.expression( 3518 exp.OnConflict( 3519 duplicate=duplicate, 3520 expressions=expressions, 3521 action=action, 3522 conflict_keys=conflict_keys, 3523 index_predicate=index_predicate, 3524 constraint=constraint, 3525 where=self._parse_where(), 3526 ) 3527 ) 3528 3529 def _parse_returning(self) -> exp.Returning | None: 3530 if not self._match(TokenType.RETURNING): 3531 return None 3532 return self.expression( 3533 exp.Returning( 3534 expressions=self._parse_csv(self._parse_expression), 3535 into=self._match(TokenType.INTO) and self._parse_table_part(), 3536 ) 3537 ) 3538 3539 def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3540 if not self._match(TokenType.FORMAT): 3541 return None 3542 return self._parse_row_format() 3543 3544 def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None: 3545 index = self._index 3546 with_ = with_ or self._match_text_seq("WITH") 3547 3548 if not self._match(TokenType.SERDE_PROPERTIES): 3549 self._retreat(index) 3550 return None 3551 return self.expression( 3552 exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_) 3553 ) 3554 3555 def _parse_row_format( 3556 self, match_row: bool = False 3557 ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3558 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3559 return None 3560 3561 if self._match_text_seq("SERDE"): 3562 this = self._parse_string() 3563 3564 serde_properties = self._parse_serde_properties() 3565 3566 return self.expression( 3567 exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties) 3568 ) 3569 3570 self._match_text_seq("DELIMITED") 3571 3572 kwargs = {} 3573 3574 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3575 kwargs["fields"] = self._parse_string() 3576 if self._match_text_seq("ESCAPED", "BY"): 3577 kwargs["escaped"] = self._parse_string() 3578 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3579 kwargs["collection_items"] = self._parse_string() 3580 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3581 kwargs["map_keys"] = self._parse_string() 3582 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3583 kwargs["lines"] = self._parse_string() 3584 if self._match_text_seq("NULL", "DEFINED", "AS"): 3585 kwargs["null"] = self._parse_string() 3586 3587 return self.expression(exp.RowFormatDelimitedProperty(**kwargs)) # type: ignore 3588 3589 def _parse_load(self) -> exp.LoadData | exp.Command: 3590 if self._match_text_seq("DATA"): 3591 local = self._match_text_seq("LOCAL") 3592 self._match_text_seq("INPATH") 3593 inpath = self._parse_string() 3594 overwrite = self._match(TokenType.OVERWRITE) 3595 self._match_pair(TokenType.INTO, TokenType.TABLE) 3596 3597 return self.expression( 3598 exp.LoadData( 3599 this=self._parse_table(schema=True), 3600 local=local, 3601 overwrite=overwrite, 3602 inpath=inpath, 3603 files=self._match_text_seq("FROM", "FILES") 3604 and exp.Properties(expressions=self._parse_wrapped_properties()), 3605 partition=self._parse_partition(), 3606 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3607 serde=self._match_text_seq("SERDE") and self._parse_string(), 3608 ) 3609 ) 3610 return self._parse_as_command(self._prev) 3611 3612 def _parse_delete(self) -> exp.Delete: 3613 hint = self._parse_hint() 3614 3615 # This handles MySQL's "Multiple-Table Syntax" 3616 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3617 tables = None 3618 if not self._match(TokenType.FROM, advance=False): 3619 tables = self._parse_csv(self._parse_table) or None 3620 3621 returning = self._parse_returning() 3622 3623 return self.expression( 3624 exp.Delete( 3625 hint=hint, 3626 tables=tables, 3627 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3628 using=self._match(TokenType.USING) 3629 and self._parse_csv(lambda: self._parse_table(joins=True)), 3630 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3631 where=self._parse_where(), 3632 returning=returning or self._parse_returning(), 3633 order=self._parse_order(), 3634 limit=self._parse_limit(), 3635 ) 3636 ) 3637 3638 def _parse_update(self) -> exp.Update: 3639 hint = self._parse_hint() 3640 kwargs: dict[str, object] = { 3641 "hint": hint, 3642 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3643 } 3644 while self._curr: 3645 if self._match(TokenType.SET): 3646 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3647 elif self._match(TokenType.RETURNING, advance=False): 3648 kwargs["returning"] = self._parse_returning() 3649 elif self._match(TokenType.FROM, advance=False): 3650 from_ = self._parse_from(joins=True) 3651 table = from_.this if from_ else None 3652 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3653 table.set("joins", list(self._parse_joins()) or None) 3654 3655 kwargs["from_"] = from_ 3656 elif self._match(TokenType.WHERE, advance=False): 3657 kwargs["where"] = self._parse_where() 3658 elif self._match(TokenType.ORDER_BY, advance=False): 3659 kwargs["order"] = self._parse_order() 3660 elif self._match(TokenType.LIMIT, advance=False): 3661 kwargs["limit"] = self._parse_limit() 3662 else: 3663 break 3664 3665 return self.expression(exp.Update(**kwargs)) 3666 3667 def _parse_use(self) -> exp.Use: 3668 return self.expression( 3669 exp.Use( 3670 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3671 this=self._parse_table(schema=False), 3672 ) 3673 ) 3674 3675 def _parse_uncache(self) -> exp.Uncache: 3676 if not self._match(TokenType.TABLE): 3677 self.raise_error("Expecting TABLE after UNCACHE") 3678 3679 return self.expression( 3680 exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True)) 3681 ) 3682 3683 def _parse_cache(self) -> exp.Cache: 3684 lazy = self._match_text_seq("LAZY") 3685 self._match(TokenType.TABLE) 3686 table = self._parse_table(schema=True) 3687 3688 options = [] 3689 if self._match_text_seq("OPTIONS"): 3690 self._match_l_paren() 3691 k = self._parse_string() 3692 self._match(TokenType.EQ) 3693 v = self._parse_string() 3694 options = [k, v] 3695 self._match_r_paren() 3696 3697 self._match(TokenType.ALIAS) 3698 return self.expression( 3699 exp.Cache( 3700 this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True) 3701 ) 3702 ) 3703 3704 def _parse_partition(self) -> exp.Partition | None: 3705 if not self._match_texts(self.PARTITION_KEYWORDS): 3706 return None 3707 3708 return self.expression( 3709 exp.Partition( 3710 subpartition=self._prev.text.upper() == "SUBPARTITION", 3711 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3712 ) 3713 ) 3714 3715 def _parse_value(self, values: bool = True) -> exp.Tuple | None: 3716 def _parse_value_expression() -> exp.Expr | None: 3717 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3718 return exp.var(self._prev.text.upper()) 3719 return self._parse_expression() 3720 3721 if self._match(TokenType.L_PAREN): 3722 expressions = self._parse_csv(_parse_value_expression) 3723 self._match_r_paren() 3724 return self.expression(exp.Tuple(expressions=expressions)) 3725 3726 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3727 expression = self._parse_expression() 3728 if expression: 3729 return self.expression(exp.Tuple(expressions=[expression])) 3730 return None 3731 3732 def _parse_projections( 3733 self, 3734 ) -> tuple[list[exp.Expr], list[exp.Expr] | None]: 3735 return self._parse_expressions(), None 3736 3737 def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None: 3738 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3739 this: exp.Expr | None = self._parse_simplified_pivot( 3740 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3741 ) 3742 elif self._match(TokenType.FROM): 3743 from_ = self._parse_from(joins=True, skip_from_token=True, consume_pipe=True) 3744 # Support parentheses for duckdb FROM-first syntax 3745 select = self._parse_select(from_=from_) 3746 if select: 3747 if not select.args.get("from_"): 3748 select.set("from_", from_) 3749 this = select 3750 else: 3751 this = exp.select("*").from_(t.cast(exp.From, from_)) 3752 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3753 else: 3754 this = ( 3755 self._parse_table(consume_pipe=True) 3756 if table 3757 else self._parse_select(nested=True, parse_set_operation=False) 3758 ) 3759 3760 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3761 # in case a modifier (e.g. join) is following 3762 if table and isinstance(this, exp.Values) and this.alias: 3763 alias = this.args["alias"].pop() 3764 this = exp.Table(this=this, alias=alias) 3765 3766 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3767 3768 return this 3769 3770 def _parse_select( 3771 self, 3772 nested: bool = False, 3773 table: bool = False, 3774 parse_subquery_alias: bool = True, 3775 parse_set_operation: bool = True, 3776 consume_pipe: bool = True, 3777 from_: exp.From | None = None, 3778 ) -> exp.Expr | None: 3779 query = self._parse_select_query( 3780 nested=nested, 3781 table=table, 3782 parse_subquery_alias=parse_subquery_alias, 3783 parse_set_operation=parse_set_operation, 3784 ) 3785 3786 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3787 if not query and from_: 3788 query = exp.select("*").from_(from_) 3789 if isinstance(query, exp.Query): 3790 query = self._parse_pipe_syntax_query(query) 3791 query = query.subquery(copy=False) if query and table else query 3792 3793 return query 3794 3795 def _parse_select_query( 3796 self, 3797 nested: bool = False, 3798 table: bool = False, 3799 parse_subquery_alias: bool = True, 3800 parse_set_operation: bool = True, 3801 ) -> exp.Expr | None: 3802 cte = self._parse_with() 3803 3804 if cte: 3805 this = self._parse_statement() 3806 3807 if not this: 3808 self.raise_error("Failed to parse any statement following CTE") 3809 return cte 3810 3811 while isinstance(this, exp.Subquery) and this.is_wrapper: 3812 this = this.this 3813 3814 assert this is not None 3815 if "with_" in this.arg_types: 3816 this.set("with_", cte) 3817 else: 3818 self.raise_error(f"{this.key} does not support CTE") 3819 this = cte 3820 3821 return this 3822 3823 # duckdb supports leading with FROM x 3824 from_ = ( 3825 self._parse_from(joins=True, consume_pipe=True) 3826 if self._match(TokenType.FROM, advance=False) 3827 else None 3828 ) 3829 3830 if self._match(TokenType.SELECT): 3831 comments = self._prev_comments 3832 3833 hint = self._parse_hint() 3834 3835 if self._next and not self._next.token_type == TokenType.DOT: 3836 all_ = self._match(TokenType.ALL) 3837 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3838 else: 3839 all_, matched_distinct = None, False 3840 3841 kind = ( 3842 self._prev.text.upper() 3843 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3844 else None 3845 ) 3846 3847 distinct: exp.Expr | None = ( 3848 self.expression( 3849 exp.Distinct( 3850 on=self._parse_value(values=False) if self._match(TokenType.ON) else None 3851 ) 3852 ) 3853 if matched_distinct 3854 else None 3855 ) 3856 3857 operation_modifiers = [] 3858 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3859 operation_modifiers.append(exp.var(self._prev.text.upper())) 3860 3861 limit = self._parse_limit(top=True) 3862 3863 # Some dialects (e.g. Redshift, T-SQL) allow SELECT TOP N DISTINCT ... 3864 if limit and not matched_distinct and not all_: 3865 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3866 if matched_distinct: 3867 distinct = self.expression( 3868 exp.Distinct( 3869 on=self._parse_value(values=False) 3870 if self._match(TokenType.ON) 3871 else None 3872 ) 3873 ) 3874 else: 3875 all_ = self._match(TokenType.ALL) 3876 3877 if all_ and distinct: 3878 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3879 3880 projections, exclude = self._parse_projections() 3881 3882 this = self.expression( 3883 exp.Select( 3884 kind=kind, 3885 hint=hint, 3886 distinct=distinct, 3887 expressions=projections, 3888 limit=limit, 3889 exclude=exclude, 3890 operation_modifiers=operation_modifiers or None, 3891 ) 3892 ) 3893 this.comments = comments 3894 3895 into = self._parse_into() 3896 if into: 3897 this.set("into", into) 3898 3899 if not from_: 3900 from_ = self._parse_from() 3901 3902 if from_: 3903 this.set("from_", from_) 3904 3905 this = self._parse_query_modifiers(this) 3906 elif (table or nested) and self._match(TokenType.L_PAREN): 3907 comments = self._prev_comments 3908 this = self._parse_wrapped_select(table=table) 3909 3910 if this: 3911 this.add_comments(comments, prepend=True) 3912 3913 # We return early here so that the UNION isn't attached to the subquery by the 3914 # following call to _parse_set_operations, but instead becomes the parent node 3915 self._match_r_paren() 3916 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3917 elif self._match(TokenType.VALUES, advance=False): 3918 this = self._parse_derived_table_values() 3919 elif from_: 3920 this = exp.select("*").from_(from_.this, copy=False) 3921 this = self._parse_query_modifiers(this) 3922 elif self._match(TokenType.SUMMARIZE): 3923 table = self._match(TokenType.TABLE) 3924 this = self._parse_select() or self._parse_string() or self._parse_table() 3925 return self.expression(exp.Summarize(this=this, table=table)) 3926 elif self._match(TokenType.DESCRIBE): 3927 this = self._parse_describe() 3928 else: 3929 this = None 3930 3931 return self._parse_set_operations(this) if parse_set_operation else this 3932 3933 def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None: 3934 self._match_text_seq("SEARCH") 3935 3936 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3937 3938 if not kind: 3939 return None 3940 3941 self._match_text_seq("FIRST", "BY") 3942 3943 return self.expression( 3944 exp.RecursiveWithSearch( 3945 kind=kind, 3946 this=self._parse_id_var(), 3947 expression=self._match_text_seq("SET") and self._parse_id_var(), 3948 using=self._match_text_seq("USING") and self._parse_id_var(), 3949 ) 3950 ) 3951 3952 def _parse_with(self, skip_with_token: bool = False) -> exp.With | None: 3953 if not skip_with_token and not self._match(TokenType.WITH): 3954 return None 3955 3956 comments = self._prev_comments 3957 recursive = self._match(TokenType.RECURSIVE) 3958 3959 last_comments = None 3960 expressions = [] 3961 while True: 3962 cte = self._parse_cte() 3963 if isinstance(cte, exp.CTE): 3964 expressions.append(cte) 3965 if last_comments: 3966 cte.add_comments(last_comments) 3967 3968 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3969 break 3970 else: 3971 self._match(TokenType.WITH) 3972 3973 last_comments = self._prev_comments 3974 3975 return self.expression( 3976 exp.With( 3977 expressions=expressions, 3978 recursive=recursive or None, 3979 search=self._parse_recursive_with_search(), 3980 ), 3981 comments=comments, 3982 ) 3983 3984 def _parse_cte(self) -> exp.CTE | None: 3985 index = self._index 3986 3987 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3988 if not alias or not alias.this: 3989 self.raise_error("Expected CTE to have alias") 3990 3991 key_expressions = ( 3992 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3993 ) 3994 3995 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3996 self._retreat(index) 3997 return None 3998 3999 comments = self._prev_comments 4000 4001 if self._match_text_seq("NOT", "MATERIALIZED"): 4002 materialized = False 4003 elif self._match_text_seq("MATERIALIZED"): 4004 materialized = True 4005 else: 4006 materialized = None 4007 4008 cte = self.expression( 4009 exp.CTE( 4010 this=self._parse_wrapped(self._parse_statement), 4011 alias=alias, 4012 materialized=materialized, 4013 key_expressions=key_expressions, 4014 ), 4015 comments=comments, 4016 ) 4017 4018 values = cte.this 4019 if isinstance(values, exp.Values): 4020 if values.alias: 4021 cte.set("this", exp.select("*").from_(values)) 4022 else: 4023 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 4024 4025 return cte 4026 4027 def _parse_table_alias( 4028 self, alias_tokens: t.Collection[TokenType] | None = None 4029 ) -> exp.TableAlias | None: 4030 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 4031 # so this section tries to parse the clause version and if it fails, it treats the token 4032 # as an identifier (alias) 4033 if self._can_parse_limit_or_offset(): 4034 return None 4035 4036 any_token = self._match(TokenType.ALIAS) 4037 alias = ( 4038 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4039 or self._parse_string_as_identifier() 4040 ) 4041 4042 index = self._index 4043 if self._match(TokenType.L_PAREN): 4044 columns = self._parse_csv(self._parse_function_parameter) 4045 self._match_r_paren() if columns else self._retreat(index) 4046 else: 4047 columns = None 4048 4049 if not alias and not columns: 4050 return None 4051 4052 table_alias = self.expression(exp.TableAlias(this=alias, columns=columns)) 4053 4054 # We bubble up comments from the Identifier to the TableAlias 4055 if isinstance(alias, exp.Identifier): 4056 table_alias.add_comments(alias.pop_comments()) 4057 4058 return table_alias 4059 4060 def _parse_subquery( 4061 self, this: exp.Expr | None, parse_alias: bool = True 4062 ) -> exp.Subquery | None: 4063 if not this: 4064 return None 4065 4066 return self.expression( 4067 exp.Subquery( 4068 this=this, 4069 pivots=self._parse_pivots(), 4070 alias=self._parse_table_alias() if parse_alias else None, 4071 sample=self._parse_table_sample(), 4072 ) 4073 ) 4074 4075 def _implicit_unnests_to_explicit(self, this: E) -> E: 4076 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 4077 4078 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 4079 for i, join in enumerate(this.args.get("joins") or []): 4080 table = join.this 4081 normalized_table = table.copy() 4082 normalized_table.meta["maybe_column"] = True 4083 normalized_table = _norm(normalized_table, dialect=self.dialect) 4084 4085 if isinstance(table, exp.Table) and not join.args.get("on"): 4086 if normalized_table.parts[0].name in refs: 4087 table_as_column = table.to_column() 4088 unnest = exp.Unnest(expressions=[table_as_column]) 4089 4090 # Table.to_column creates a parent Alias node that we want to convert to 4091 # a TableAlias and attach to the Unnest, so it matches the parser's output 4092 if isinstance(table.args.get("alias"), exp.TableAlias): 4093 table_as_column.replace(table_as_column.this) 4094 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 4095 4096 table.replace(unnest) 4097 4098 refs.add(normalized_table.alias_or_name) 4099 4100 return this 4101 4102 @t.overload 4103 def _parse_query_modifiers(self, this: E) -> E: ... 4104 4105 @t.overload 4106 def _parse_query_modifiers(self, this: None) -> None: ... 4107 4108 def _parse_query_modifiers(self, this): 4109 if isinstance(this, self.MODIFIABLES): 4110 for join in self._parse_joins(): 4111 this.append("joins", join) 4112 for lateral in iter(self._parse_lateral, None): 4113 this.append("laterals", lateral) 4114 4115 while True: 4116 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 4117 modifier_token = self._curr 4118 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 4119 key, expression = parser(self) 4120 4121 if expression: 4122 if this.args.get(key): 4123 self.raise_error( 4124 f"Found multiple '{modifier_token.text.upper()}' clauses", 4125 token=modifier_token, 4126 ) 4127 4128 this.set(key, expression) 4129 if key == "limit": 4130 offset = expression.args.get("offset") 4131 expression.set("offset", None) 4132 4133 if offset: 4134 offset = exp.Offset(expression=offset) 4135 this.set("offset", offset) 4136 4137 limit_by_expressions = expression.expressions 4138 expression.set("expressions", None) 4139 offset.set("expressions", limit_by_expressions) 4140 continue 4141 break 4142 4143 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 4144 this = self._implicit_unnests_to_explicit(this) 4145 4146 return this 4147 4148 def _parse_hint_fallback_to_string(self) -> exp.Hint | None: 4149 start = self._curr 4150 while self._curr: 4151 self._advance() 4152 4153 end = self._tokens[self._index - 1] 4154 return exp.Hint(expressions=[self._find_sql(start, end)]) 4155 4156 def _parse_hint_function_call(self) -> exp.Expr | None: 4157 return self._parse_function_call() 4158 4159 def _parse_hint_body(self) -> exp.Hint | None: 4160 start_index = self._index 4161 should_fallback_to_string = False 4162 4163 hints = [] 4164 try: 4165 for hint in iter( 4166 lambda: self._parse_csv( 4167 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 4168 ), 4169 [], 4170 ): 4171 hints.extend(hint) 4172 except ParseError: 4173 should_fallback_to_string = True 4174 4175 if should_fallback_to_string or self._curr: 4176 self._retreat(start_index) 4177 return self._parse_hint_fallback_to_string() 4178 4179 return self.expression(exp.Hint(expressions=hints)) 4180 4181 def _parse_hint(self) -> exp.Hint | None: 4182 if self._match(TokenType.HINT) and self._prev_comments: 4183 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 4184 4185 return None 4186 4187 def _parse_into(self) -> exp.Into | None: 4188 if not self._match(TokenType.INTO): 4189 return None 4190 4191 temp = self._match(TokenType.TEMPORARY) 4192 unlogged = self._match_text_seq("UNLOGGED") 4193 self._match(TokenType.TABLE) 4194 4195 return self.expression( 4196 exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged) 4197 ) 4198 4199 def _parse_from( 4200 self, 4201 joins: bool = False, 4202 skip_from_token: bool = False, 4203 consume_pipe: bool = False, 4204 ) -> exp.From | None: 4205 if not skip_from_token and not self._match(TokenType.FROM): 4206 return None 4207 4208 comments = self._prev_comments 4209 return self.expression( 4210 exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)), 4211 comments=comments, 4212 ) 4213 4214 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 4215 return self.expression( 4216 exp.MatchRecognizeMeasure( 4217 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 4218 this=self._parse_expression(), 4219 ) 4220 ) 4221 4222 def _parse_match_recognize(self) -> exp.MatchRecognize | None: 4223 if not self._match(TokenType.MATCH_RECOGNIZE): 4224 return None 4225 4226 self._match_l_paren() 4227 4228 partition = self._parse_partition_by() 4229 order = self._parse_order() 4230 4231 measures = ( 4232 self._parse_csv(self._parse_match_recognize_measure) 4233 if self._match_text_seq("MEASURES") 4234 else None 4235 ) 4236 4237 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 4238 rows = exp.var("ONE ROW PER MATCH") 4239 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 4240 text = "ALL ROWS PER MATCH" 4241 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 4242 text += " SHOW EMPTY MATCHES" 4243 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 4244 text += " OMIT EMPTY MATCHES" 4245 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 4246 text += " WITH UNMATCHED ROWS" 4247 rows = exp.var(text) 4248 else: 4249 rows = None 4250 4251 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4252 text = "AFTER MATCH SKIP" 4253 if self._match_text_seq("PAST", "LAST", "ROW"): 4254 text += " PAST LAST ROW" 4255 elif self._match_text_seq("TO", "NEXT", "ROW"): 4256 text += " TO NEXT ROW" 4257 elif self._match_text_seq("TO", "FIRST"): 4258 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4259 elif self._match_text_seq("TO", "LAST"): 4260 text += f" TO LAST {self._advance_any().text}" # type: ignore 4261 after = exp.var(text) 4262 else: 4263 after = None 4264 4265 if self._match_text_seq("PATTERN"): 4266 self._match_l_paren() 4267 4268 if not self._curr: 4269 self.raise_error("Expecting )", self._curr) 4270 4271 paren = 1 4272 start = self._curr 4273 4274 while self._curr and paren > 0: 4275 if self._curr.token_type == TokenType.L_PAREN: 4276 paren += 1 4277 if self._curr.token_type == TokenType.R_PAREN: 4278 paren -= 1 4279 4280 end = self._prev 4281 self._advance() 4282 4283 if paren > 0: 4284 self.raise_error("Expecting )", self._curr) 4285 4286 pattern = exp.var(self._find_sql(start, end)) 4287 else: 4288 pattern = None 4289 4290 define = ( 4291 self._parse_csv(self._parse_name_as_expression) 4292 if self._match_text_seq("DEFINE") 4293 else None 4294 ) 4295 4296 self._match_r_paren() 4297 4298 return self.expression( 4299 exp.MatchRecognize( 4300 partition_by=partition, 4301 order=order, 4302 measures=measures, 4303 rows=rows, 4304 after=after, 4305 pattern=pattern, 4306 define=define, 4307 alias=self._parse_table_alias(), 4308 ) 4309 ) 4310 4311 def _parse_lateral(self) -> exp.Lateral | None: 4312 cross_apply: bool | None = None 4313 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4314 cross_apply = True 4315 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4316 cross_apply = False 4317 4318 if cross_apply is not None: 4319 this = self._parse_select(table=True) 4320 view = None 4321 outer = None 4322 elif self._match(TokenType.LATERAL): 4323 this = self._parse_select(table=True) 4324 view = self._match(TokenType.VIEW) 4325 outer = self._match(TokenType.OUTER) 4326 else: 4327 return None 4328 4329 if not this: 4330 this = ( 4331 self._parse_unnest() 4332 or self._parse_function() 4333 or self._parse_id_var(any_token=False) 4334 ) 4335 4336 while self._match(TokenType.DOT): 4337 this = exp.Dot( 4338 this=this, 4339 expression=self._parse_function() or self._parse_id_var(any_token=False), 4340 ) 4341 4342 ordinality: bool | None = None 4343 4344 if view: 4345 table = self._parse_id_var(any_token=False) 4346 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4347 table_alias: exp.TableAlias | None = self.expression( 4348 exp.TableAlias(this=table, columns=columns) 4349 ) 4350 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4351 # We move the alias from the lateral's child node to the lateral itself 4352 table_alias = this.args["alias"].pop() 4353 else: 4354 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4355 table_alias = self._parse_table_alias() 4356 4357 return self.expression( 4358 exp.Lateral( 4359 this=this, 4360 view=view, 4361 outer=outer, 4362 alias=table_alias, 4363 cross_apply=cross_apply, 4364 ordinality=ordinality, 4365 ) 4366 ) 4367 4368 def _parse_stream(self) -> exp.Stream | None: 4369 index = self._index 4370 if self._match(TokenType.STREAM): 4371 if this := self._try_parse(self._parse_table): 4372 return self.expression(exp.Stream(this=this)) 4373 self._retreat(index) 4374 return None 4375 4376 def _parse_join_parts( 4377 self, 4378 ) -> tuple[Token | None, Token | None, Token | None]: 4379 return ( 4380 self._prev if self._match_set(self.JOIN_METHODS) else None, 4381 self._prev if self._match_set(self.JOIN_SIDES) else None, 4382 self._prev if self._match_set(self.JOIN_KINDS) else None, 4383 ) 4384 4385 def _parse_using_identifiers(self) -> list[exp.Expr]: 4386 def _parse_column_as_identifier() -> exp.Expr | None: 4387 this = self._parse_column() 4388 if isinstance(this, exp.Column): 4389 return this.this 4390 return this 4391 4392 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4393 4394 def _parse_join( 4395 self, skip_join_token: bool = False, parse_bracket: bool = False 4396 ) -> exp.Join | None: 4397 if self._match(TokenType.COMMA): 4398 table = self._try_parse(self._parse_table) 4399 cross_join = self.expression(exp.Join(this=table)) if table else None 4400 4401 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4402 cross_join.set("kind", "CROSS") 4403 4404 return cross_join 4405 4406 index = self._index 4407 method, side, kind = self._parse_join_parts() 4408 directed = self._match_text_seq("DIRECTED") 4409 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4410 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4411 join_comments = self._prev_comments 4412 4413 if not skip_join_token and not join: 4414 self._retreat(index) 4415 kind = None 4416 method = None 4417 side = None 4418 4419 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4420 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4421 4422 if not skip_join_token and not join and not outer_apply and not cross_apply: 4423 return None 4424 4425 kwargs: dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4426 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4427 kwargs["expressions"] = self._parse_csv( 4428 lambda: self._parse_table(parse_bracket=parse_bracket) 4429 ) 4430 4431 if method: 4432 kwargs["method"] = method.text.upper() 4433 if side: 4434 kwargs["side"] = side.text.upper() 4435 if kind: 4436 kwargs["kind"] = kind.text.upper() 4437 if hint: 4438 kwargs["hint"] = hint 4439 4440 if self._match(TokenType.MATCH_CONDITION): 4441 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4442 4443 if self._match(TokenType.ON): 4444 kwargs["on"] = self._parse_disjunction() 4445 elif self._match(TokenType.USING): 4446 kwargs["using"] = self._parse_using_identifiers() 4447 elif ( 4448 not method 4449 and not (outer_apply or cross_apply) 4450 and not isinstance(kwargs["this"], exp.Unnest) 4451 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4452 ): 4453 index = self._index 4454 joins: list | None = list(self._parse_joins()) 4455 4456 if joins and self._match(TokenType.ON): 4457 kwargs["on"] = self._parse_disjunction() 4458 elif joins and self._match(TokenType.USING): 4459 kwargs["using"] = self._parse_using_identifiers() 4460 else: 4461 joins = None 4462 self._retreat(index) 4463 4464 kwargs["this"].set("joins", joins if joins else None) 4465 4466 kwargs["pivots"] = self._parse_pivots() 4467 4468 comments = [c for token in (method, side, kind) if token for c in token.comments] 4469 comments = (join_comments or []) + comments 4470 4471 if ( 4472 self.ADD_JOIN_ON_TRUE 4473 and not kwargs.get("on") 4474 and not kwargs.get("using") 4475 and not kwargs.get("method") 4476 and kwargs.get("kind") in (None, "INNER", "OUTER") 4477 ): 4478 kwargs["on"] = exp.true() 4479 4480 if directed: 4481 kwargs["directed"] = directed 4482 4483 return self.expression(exp.Join(**kwargs), comments=comments) 4484 4485 def _parse_opclass(self) -> exp.Expr | None: 4486 this = self._parse_disjunction() 4487 4488 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4489 return this 4490 4491 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4492 return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts())) 4493 4494 return this 4495 4496 def _parse_index_params(self) -> exp.IndexParameters: 4497 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4498 4499 if self._match(TokenType.L_PAREN, advance=False): 4500 columns = self._parse_wrapped_csv(self._parse_with_operator) 4501 else: 4502 columns = None 4503 4504 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4505 partition_by = self._parse_partition_by() 4506 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4507 tablespace = ( 4508 self._parse_var(any_token=True) 4509 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4510 else None 4511 ) 4512 where = self._parse_where() 4513 4514 on = self._parse_field() if self._match(TokenType.ON) else None 4515 4516 return self.expression( 4517 exp.IndexParameters( 4518 using=using, 4519 columns=columns, 4520 include=include, 4521 partition_by=partition_by, 4522 where=where, 4523 with_storage=with_storage, 4524 tablespace=tablespace, 4525 on=on, 4526 ) 4527 ) 4528 4529 def _parse_index( 4530 self, index: exp.Expr | None = None, anonymous: bool = False 4531 ) -> exp.Index | None: 4532 if index or anonymous: 4533 unique = None 4534 primary = None 4535 amp = None 4536 4537 self._match(TokenType.ON) 4538 self._match(TokenType.TABLE) # hive 4539 table = self._parse_table_parts(schema=True) 4540 else: 4541 unique = self._match(TokenType.UNIQUE) 4542 primary = self._match_text_seq("PRIMARY") 4543 amp = self._match_text_seq("AMP") 4544 4545 if not self._match(TokenType.INDEX): 4546 return None 4547 4548 index = self._parse_id_var() 4549 table = None 4550 4551 params = self._parse_index_params() 4552 4553 return self.expression( 4554 exp.Index( 4555 this=index, table=table, unique=unique, primary=primary, amp=amp, params=params 4556 ) 4557 ) 4558 4559 def _parse_table_hints(self) -> list[exp.Expr] | None: 4560 hints: list[exp.Expr] = [] 4561 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4562 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4563 hints.append( 4564 self.expression( 4565 exp.WithTableHint( 4566 expressions=self._parse_csv( 4567 lambda: self._parse_function() or self._parse_var(any_token=True) 4568 ) 4569 ) 4570 ) 4571 ) 4572 self._match_r_paren() 4573 else: 4574 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4575 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4576 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4577 4578 self._match_set((TokenType.INDEX, TokenType.KEY)) 4579 if self._match(TokenType.FOR): 4580 hint.set("target", self._advance_any() and self._prev.text.upper()) 4581 4582 hint.set("expressions", self._parse_wrapped_id_vars()) 4583 hints.append(hint) 4584 4585 return hints or None 4586 4587 def _parse_table_part(self, schema: bool = False) -> exp.Expr | None: 4588 return ( 4589 (not schema and self._parse_function(optional_parens=False)) 4590 or self._parse_id_var(any_token=False) 4591 or self._parse_string_as_identifier() 4592 or self._parse_placeholder() 4593 ) 4594 4595 def _parse_table_parts_fast(self) -> exp.Table | None: 4596 index = self._index 4597 parts: list[exp.Identifier] | None = None 4598 all_comments: list[str] | None = None 4599 4600 while self._match_set(self.IDENTIFIER_TOKENS): 4601 token = self._prev 4602 comments = self._prev_comments 4603 4604 has_dot = self._match(TokenType.DOT) 4605 curr_tt = self._curr.token_type 4606 4607 if not has_dot: 4608 if curr_tt in self.TABLE_POSTFIX_TOKENS: 4609 self._retreat(index) 4610 return None 4611 elif curr_tt not in self.IDENTIFIER_TOKENS: 4612 self._retreat(index) 4613 return None 4614 4615 if parts is None: 4616 parts = [] 4617 4618 if comments: 4619 if all_comments is None: 4620 all_comments = [] 4621 all_comments.extend(comments) 4622 self._prev_comments = [] 4623 4624 parts.append( 4625 self.expression( 4626 exp.Identifier( 4627 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 4628 ), 4629 token, 4630 ) 4631 ) 4632 4633 if not has_dot: 4634 break 4635 4636 if parts is None: 4637 return None 4638 4639 n = len(parts) 4640 4641 if n == 1: 4642 table: exp.Table = exp.Table(this=parts[0]) 4643 elif n == 2: 4644 table = exp.Table(this=parts[1], db=parts[0]) 4645 elif n >= 3: 4646 this: exp.Identifier | exp.Dot = parts[2] 4647 for i in range(3, n): 4648 this = exp.Dot(this=this, expression=parts[i]) 4649 4650 table = exp.Table(this=this, db=parts[1], catalog=parts[0]) 4651 4652 if table is None: 4653 self._retreat(index) 4654 elif all_comments: 4655 table.add_comments(all_comments) 4656 return table 4657 4658 def _parse_table_parts( 4659 self, 4660 schema: bool = False, 4661 is_db_reference: bool = False, 4662 wildcard: bool = False, 4663 fast: bool = False, 4664 ) -> exp.Table | exp.Dot | None: 4665 if fast: 4666 return self._parse_table_parts_fast() 4667 4668 catalog: exp.Expr | str | None = None 4669 db: exp.Expr | str | None = None 4670 table: exp.Expr | str | None = self._parse_table_part(schema=schema) 4671 4672 while self._match(TokenType.DOT): 4673 if catalog: 4674 # This allows nesting the table in arbitrarily many dot expressions if needed 4675 table = self.expression( 4676 exp.Dot(this=table, expression=self._parse_table_part(schema=schema)) 4677 ) 4678 else: 4679 catalog = db 4680 db = table 4681 # "" used for tsql FROM a..b case 4682 table = self._parse_table_part(schema=schema) or "" 4683 4684 if ( 4685 wildcard 4686 and self._is_connected() 4687 and (isinstance(table, exp.Identifier) or not table) 4688 and self._match(TokenType.STAR) 4689 ): 4690 if isinstance(table, exp.Identifier): 4691 table.args["this"] += "*" 4692 else: 4693 table = exp.Identifier(this="*") 4694 4695 if is_db_reference: 4696 catalog = db 4697 db = table 4698 table = None 4699 4700 if not table and not is_db_reference: 4701 self.raise_error(f"Expected table name but got {self._curr}") 4702 if not db and is_db_reference: 4703 self.raise_error(f"Expected database name but got {self._curr}") 4704 4705 table = self.expression(exp.Table(this=table, db=db, catalog=catalog)) 4706 4707 # Bubble up comments from identifier parts to the Table 4708 comments = [] 4709 for part in table.parts: 4710 if part_comments := part.pop_comments(): 4711 comments.extend(part_comments) 4712 if comments: 4713 table.add_comments(comments) 4714 4715 changes = self._parse_changes() 4716 if changes: 4717 table.set("changes", changes) 4718 4719 at_before = self._parse_historical_data() 4720 if at_before: 4721 table.set("when", at_before) 4722 4723 pivots = self._parse_pivots() 4724 if pivots: 4725 table.set("pivots", pivots) 4726 4727 return table 4728 4729 def _parse_table( 4730 self, 4731 schema: bool = False, 4732 joins: bool = False, 4733 alias_tokens: t.Collection[TokenType] | None = None, 4734 parse_bracket: bool = False, 4735 is_db_reference: bool = False, 4736 parse_partition: bool = False, 4737 consume_pipe: bool = False, 4738 ) -> exp.Expr | None: 4739 if not schema and not is_db_reference and not consume_pipe and not joins: 4740 index = self._index 4741 table = self._parse_table_parts(fast=True) 4742 4743 if table is not None: 4744 curr_tt = self._curr.token_type 4745 next_tt = self._next.token_type 4746 4747 fast_terminators = self.TABLE_TERMINATORS 4748 4749 # only return the table if we're sure there are no other operators 4750 # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT 4751 if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION: 4752 return table 4753 4754 postfix_tokens = self.TABLE_POSTFIX_TOKENS 4755 4756 if curr_tt not in postfix_tokens and next_tt not in postfix_tokens: 4757 if alias := self._parse_table_alias( 4758 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 4759 ): 4760 table.set("alias", alias) 4761 4762 if self._curr.token_type in fast_terminators: 4763 return table 4764 4765 self._retreat(index) 4766 4767 if stream := self._parse_stream(): 4768 return stream 4769 4770 if lateral := self._parse_lateral(): 4771 return lateral 4772 4773 if unnest := self._parse_unnest(): 4774 return unnest 4775 4776 if values := self._parse_derived_table_values(): 4777 return values 4778 4779 if subquery := self._parse_select(table=True, consume_pipe=consume_pipe): 4780 if not subquery.args.get("pivots"): 4781 subquery.set("pivots", self._parse_pivots()) 4782 if joins: 4783 for join in self._parse_joins(): 4784 subquery.append("joins", join) 4785 return subquery 4786 4787 bracket = parse_bracket and self._parse_bracket(None) 4788 bracket = self.expression(exp.Table(this=bracket)) if bracket else None 4789 4790 rows_from_tables = ( 4791 self._parse_wrapped_csv(self._parse_table) 4792 if self._match_text_seq("ROWS", "FROM") 4793 else None 4794 ) 4795 rows_from = ( 4796 self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None 4797 ) 4798 4799 only = self._match(TokenType.ONLY) 4800 4801 this = t.cast( 4802 exp.Expr, 4803 bracket 4804 or rows_from 4805 or self._parse_bracket( 4806 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4807 ), 4808 ) 4809 4810 if only: 4811 this.set("only", only) 4812 4813 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4814 self._match(TokenType.STAR) 4815 4816 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4817 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4818 this.set("partition", self._parse_partition()) 4819 4820 if schema: 4821 return self._parse_schema(this=this) 4822 4823 if self.dialect.ALIAS_POST_VERSION: 4824 this.set("version", self._parse_version()) 4825 4826 if self.dialect.ALIAS_POST_TABLESAMPLE: 4827 this.set("sample", self._parse_table_sample()) 4828 4829 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4830 if alias: 4831 this.set("alias", alias) 4832 4833 if self._match(TokenType.INDEXED_BY): 4834 this.set("indexed", self._parse_table_parts()) 4835 elif self._match_text_seq("NOT", "INDEXED"): 4836 this.set("indexed", False) 4837 4838 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4839 return self.expression( 4840 exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var()) 4841 ) 4842 4843 this.set("hints", self._parse_table_hints()) 4844 4845 if not this.args.get("pivots"): 4846 this.set("pivots", self._parse_pivots()) 4847 4848 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4849 this.set("sample", self._parse_table_sample()) 4850 4851 if not self.dialect.ALIAS_POST_VERSION: 4852 this.set("version", self._parse_version()) 4853 4854 if joins: 4855 for join in self._parse_joins(): 4856 this.append("joins", join) 4857 4858 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4859 this.set("ordinality", True) 4860 this.set("alias", self._parse_table_alias()) 4861 4862 return this 4863 4864 def _parse_version(self) -> exp.Version | None: 4865 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4866 this = "TIMESTAMP" 4867 elif self._match(TokenType.VERSION_SNAPSHOT): 4868 this = "VERSION" 4869 else: 4870 return None 4871 4872 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4873 kind = self._prev.text.upper() 4874 start = self._parse_bitwise() 4875 self._match_texts(("TO", "AND")) 4876 end = self._parse_bitwise() 4877 expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end])) 4878 elif self._match_text_seq("CONTAINED", "IN"): 4879 kind = "CONTAINED IN" 4880 expression = self.expression( 4881 exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise)) 4882 ) 4883 elif self._match(TokenType.ALL): 4884 kind = "ALL" 4885 expression = None 4886 else: 4887 self._match_text_seq("AS", "OF") 4888 kind = "AS OF" 4889 expression = self._parse_type() 4890 4891 return self.expression(exp.Version(this=this, expression=expression, kind=kind)) 4892 4893 def _parse_historical_data(self) -> exp.HistoricalData | None: 4894 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4895 index = self._index 4896 historical_data = None 4897 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4898 this = self._prev.text.upper() 4899 kind = ( 4900 self._match(TokenType.L_PAREN) 4901 and self._match_texts(self.HISTORICAL_DATA_KIND) 4902 and self._prev.text.upper() 4903 ) 4904 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4905 4906 if expression: 4907 self._match_r_paren() 4908 historical_data = self.expression( 4909 exp.HistoricalData(this=this, kind=kind, expression=expression) 4910 ) 4911 else: 4912 self._retreat(index) 4913 4914 return historical_data 4915 4916 def _parse_changes(self) -> exp.Changes | None: 4917 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4918 return None 4919 4920 information = self._parse_var(any_token=True) 4921 self._match_r_paren() 4922 4923 return self.expression( 4924 exp.Changes( 4925 information=information, 4926 at_before=self._parse_historical_data(), 4927 end=self._parse_historical_data(), 4928 ) 4929 ) 4930 4931 def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None: 4932 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4933 return None 4934 4935 self._advance() 4936 4937 expressions = self._parse_wrapped_csv(self._parse_equality) 4938 offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4939 4940 alias = self._parse_table_alias() if with_alias else None 4941 4942 if alias: 4943 if self.dialect.UNNEST_COLUMN_ONLY: 4944 if alias.args.get("columns"): 4945 self.raise_error("Unexpected extra column alias in unnest.") 4946 4947 alias.set("columns", [alias.this]) 4948 alias.set("this", None) 4949 4950 columns = alias.args.get("columns") or [] 4951 if offset and len(expressions) < len(columns): 4952 offset = columns.pop() 4953 4954 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4955 self._match(TokenType.ALIAS) 4956 offset = self._parse_id_var( 4957 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4958 ) or exp.to_identifier("offset") 4959 4960 return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset)) 4961 4962 def _parse_derived_table_values(self) -> exp.Values | None: 4963 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4964 if not is_derived and not ( 4965 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4966 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4967 ): 4968 return None 4969 4970 expressions = self._parse_csv(self._parse_value) 4971 alias = self._parse_table_alias() 4972 4973 if is_derived: 4974 self._match_r_paren() 4975 4976 return self.expression( 4977 exp.Values(expressions=expressions, alias=alias or self._parse_table_alias()) 4978 ) 4979 4980 def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None: 4981 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4982 as_modifier and self._match_text_seq("USING", "SAMPLE") 4983 ): 4984 return None 4985 4986 bucket_numerator = None 4987 bucket_denominator = None 4988 bucket_field = None 4989 percent = None 4990 size = None 4991 seed = None 4992 4993 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4994 matched_l_paren = self._match(TokenType.L_PAREN) 4995 4996 if self.TABLESAMPLE_CSV: 4997 num = None 4998 expressions = self._parse_csv(self._parse_primary) 4999 else: 5000 expressions = None 5001 num = ( 5002 self._parse_factor() 5003 if self._match(TokenType.NUMBER, advance=False) 5004 else self._parse_primary() or self._parse_placeholder() 5005 ) 5006 5007 if self._match_text_seq("BUCKET"): 5008 bucket_numerator = self._parse_number() 5009 self._match_text_seq("OUT", "OF") 5010 bucket_denominator = bucket_denominator = self._parse_number() 5011 self._match(TokenType.ON) 5012 bucket_field = self._parse_field() 5013 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 5014 percent = num 5015 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 5016 size = num 5017 else: 5018 percent = num 5019 5020 if matched_l_paren: 5021 self._match_r_paren() 5022 5023 if self._match(TokenType.L_PAREN): 5024 method = self._parse_var(upper=True) 5025 seed = self._match(TokenType.COMMA) and self._parse_number() 5026 self._match_r_paren() 5027 elif self._match_texts(("SEED", "REPEATABLE")): 5028 seed = self._parse_wrapped(self._parse_number) 5029 5030 if not method and self.DEFAULT_SAMPLING_METHOD: 5031 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 5032 5033 return self.expression( 5034 exp.TableSample( 5035 expressions=expressions, 5036 method=method, 5037 bucket_numerator=bucket_numerator, 5038 bucket_denominator=bucket_denominator, 5039 bucket_field=bucket_field, 5040 percent=percent, 5041 size=size, 5042 seed=seed, 5043 ) 5044 ) 5045 5046 def _parse_pivots(self) -> list[exp.Pivot] | None: 5047 if self._curr.token_type not in (TokenType.PIVOT, TokenType.UNPIVOT): 5048 return None 5049 return list(iter(self._parse_pivot, None)) or None 5050 5051 def _parse_joins(self) -> t.Iterator[exp.Join]: 5052 return iter(self._parse_join, None) 5053 5054 def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None: 5055 if not self._match(TokenType.INTO): 5056 return None 5057 5058 return self.expression( 5059 exp.UnpivotColumns( 5060 this=self._match_text_seq("NAME") and self._parse_column(), 5061 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 5062 ) 5063 ) 5064 5065 # https://duckdb.org/docs/sql/statements/pivot 5066 def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot: 5067 def _parse_on() -> exp.Expr | None: 5068 this = self._parse_bitwise() 5069 5070 if self._match(TokenType.IN): 5071 # PIVOT ... ON col IN (row_val1, row_val2) 5072 return self._parse_in(this) 5073 if self._match(TokenType.ALIAS, advance=False): 5074 # UNPIVOT ... ON (col1, col2, col3) AS row_val 5075 return self._parse_alias(this) 5076 5077 return this 5078 5079 this = self._parse_table() 5080 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 5081 into = self._parse_unpivot_columns() 5082 using = self._match(TokenType.USING) and self._parse_csv( 5083 lambda: self._parse_alias(self._parse_column()) 5084 ) 5085 group = self._parse_group() 5086 5087 return self.expression( 5088 exp.Pivot( 5089 this=this, 5090 expressions=expressions, 5091 using=using, 5092 group=group, 5093 unpivot=is_unpivot, 5094 into=into, 5095 ) 5096 ) 5097 5098 def _parse_pivot_in(self) -> exp.In: 5099 def _parse_aliased_expression() -> exp.Expr | None: 5100 this = self._parse_select_or_expression() 5101 5102 self._match(TokenType.ALIAS) 5103 alias = self._parse_bitwise() 5104 if alias: 5105 if isinstance(alias, exp.Column) and not alias.db: 5106 alias = alias.this 5107 return self.expression(exp.PivotAlias(this=this, alias=alias)) 5108 5109 return this 5110 5111 value = self._parse_column() 5112 5113 if not self._match(TokenType.IN): 5114 self.raise_error("Expecting IN") 5115 5116 if self._match(TokenType.L_PAREN): 5117 if self._match(TokenType.ANY): 5118 exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order())) 5119 else: 5120 exprs = self._parse_csv(_parse_aliased_expression) 5121 self._match_r_paren() 5122 return self.expression(exp.In(this=value, expressions=exprs)) 5123 5124 return self.expression(exp.In(this=value, field=self._parse_id_var())) 5125 5126 def _parse_pivot_aggregation(self) -> exp.Expr | None: 5127 func = self._parse_function() 5128 if not func: 5129 if self._prev.token_type == TokenType.COMMA: 5130 return None 5131 self.raise_error("Expecting an aggregation function in PIVOT") 5132 5133 return self._parse_alias(func) 5134 5135 def _parse_pivot(self) -> exp.Pivot | None: 5136 index = self._index 5137 include_nulls = None 5138 5139 if self._match(TokenType.PIVOT): 5140 unpivot = False 5141 elif self._match(TokenType.UNPIVOT): 5142 unpivot = True 5143 5144 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 5145 if self._match_text_seq("INCLUDE", "NULLS"): 5146 include_nulls = True 5147 elif self._match_text_seq("EXCLUDE", "NULLS"): 5148 include_nulls = False 5149 else: 5150 return None 5151 5152 expressions = [] 5153 5154 if not self._match(TokenType.L_PAREN): 5155 self._retreat(index) 5156 return None 5157 5158 if unpivot: 5159 expressions = self._parse_csv(self._parse_column) 5160 else: 5161 expressions = self._parse_csv(self._parse_pivot_aggregation) 5162 5163 if not expressions: 5164 self.raise_error("Failed to parse PIVOT's aggregation list") 5165 5166 if not self._match(TokenType.FOR): 5167 self.raise_error("Expecting FOR") 5168 5169 fields = [] 5170 while True: 5171 field = self._try_parse(self._parse_pivot_in) 5172 if not field: 5173 break 5174 fields.append(field) 5175 5176 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 5177 self._parse_bitwise 5178 ) 5179 5180 group = self._parse_group() 5181 5182 self._match_r_paren() 5183 5184 pivot = self.expression( 5185 exp.Pivot( 5186 expressions=expressions, 5187 fields=fields, 5188 unpivot=unpivot, 5189 include_nulls=include_nulls, 5190 default_on_null=default_on_null, 5191 group=group, 5192 ) 5193 ) 5194 5195 if unpivot: 5196 pivot.set("expressions", [_unpivot_target(e) for e in pivot.expressions]) 5197 for pivot_field in pivot.fields: 5198 if isinstance(pivot_field, exp.In): 5199 pivot_field.set("this", _unpivot_target(pivot_field.this)) 5200 5201 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 5202 pivot.set("alias", self._parse_table_alias()) 5203 5204 if not unpivot: 5205 names = self._pivot_column_names(t.cast(list[exp.Expr], expressions)) 5206 5207 columns: list[exp.Expr] = [] 5208 all_fields = [] 5209 for pivot_field in pivot.fields: 5210 pivot_field_expressions = pivot_field.expressions 5211 5212 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 5213 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 5214 continue 5215 5216 all_fields.append( 5217 [ 5218 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 5219 for fld in pivot_field_expressions 5220 ] 5221 ) 5222 5223 if all_fields: 5224 if names: 5225 all_fields.append(names) 5226 5227 # Generate all possible combinations of the pivot columns 5228 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 5229 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 5230 for fld_parts_tuple in itertools.product(*all_fields): 5231 fld_parts = list(fld_parts_tuple) 5232 5233 if names and self.PREFIXED_PIVOT_COLUMNS: 5234 # Move the "name" to the front of the list 5235 fld_parts.insert(0, fld_parts.pop(-1)) 5236 5237 columns.append(exp.to_identifier("_".join(fld_parts))) 5238 5239 pivot.set("columns", columns) 5240 5241 return pivot 5242 5243 def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]: 5244 return [agg.alias for agg in aggregations if agg.alias] 5245 5246 def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None: 5247 if not skip_where_token and not self._match(TokenType.PREWHERE): 5248 return None 5249 5250 comments = self._prev_comments 5251 return self.expression( 5252 exp.PreWhere(this=self._parse_disjunction()), 5253 comments=comments, 5254 ) 5255 5256 def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None: 5257 if not skip_where_token and not self._match(TokenType.WHERE): 5258 return None 5259 5260 comments = self._prev_comments 5261 return self.expression( 5262 exp.Where(this=self._parse_disjunction()), 5263 comments=comments, 5264 ) 5265 5266 def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None: 5267 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 5268 return None 5269 comments = self._prev_comments 5270 5271 elements: dict[str, t.Any] = defaultdict(list) 5272 5273 if self._match(TokenType.ALL): 5274 elements["all"] = True 5275 elif self._match(TokenType.DISTINCT): 5276 elements["all"] = False 5277 5278 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 5279 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5280 5281 while True: 5282 index = self._index 5283 5284 elements["expressions"].extend( 5285 self._parse_csv( 5286 lambda: ( 5287 None 5288 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 5289 else self._parse_disjunction() 5290 ) 5291 ) 5292 ) 5293 5294 before_with_index = self._index 5295 with_prefix = self._match(TokenType.WITH) 5296 5297 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 5298 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 5299 elements[key].append(cube_or_rollup) 5300 elif grouping_sets := self._parse_grouping_sets(): 5301 elements["grouping_sets"].append(grouping_sets) 5302 elif self._match_text_seq("TOTALS"): 5303 elements["totals"] = True # type: ignore 5304 5305 if before_with_index <= self._index <= before_with_index + 1: 5306 self._retreat(before_with_index) 5307 break 5308 5309 if index == self._index: 5310 break 5311 5312 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5313 5314 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None: 5315 if self._match(TokenType.CUBE): 5316 kind: type[exp.Cube | exp.Rollup] = exp.Cube 5317 elif self._match(TokenType.ROLLUP): 5318 kind = exp.Rollup 5319 else: 5320 return None 5321 5322 return self.expression( 5323 kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise)) 5324 ) 5325 5326 def _parse_grouping_sets(self) -> exp.GroupingSets | None: 5327 if self._match(TokenType.GROUPING_SETS): 5328 return self.expression( 5329 exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set)) 5330 ) 5331 return None 5332 5333 def _parse_grouping_set(self) -> exp.Expr | None: 5334 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 5335 5336 def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None: 5337 if not skip_having_token and not self._match(TokenType.HAVING): 5338 return None 5339 comments = self._prev_comments 5340 return self.expression( 5341 exp.Having(this=self._parse_disjunction()), 5342 comments=comments, 5343 ) 5344 5345 def _parse_qualify(self) -> exp.Qualify | None: 5346 if not self._match(TokenType.QUALIFY): 5347 return None 5348 return self.expression(exp.Qualify(this=self._parse_disjunction())) 5349 5350 def _parse_connect_with_prior(self) -> exp.Expr | None: 5351 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 5352 exp.Prior(this=self._parse_bitwise()) 5353 ) 5354 connect = self._parse_disjunction() 5355 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 5356 return connect 5357 5358 def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None: 5359 if skip_start_token: 5360 start = None 5361 elif self._match(TokenType.START_WITH): 5362 start = self._parse_disjunction() 5363 else: 5364 return None 5365 5366 self._match(TokenType.CONNECT_BY) 5367 nocycle = self._match_text_seq("NOCYCLE") 5368 connect = self._parse_connect_with_prior() 5369 5370 if not start and self._match(TokenType.START_WITH): 5371 start = self._parse_disjunction() 5372 5373 return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle)) 5374 5375 def _parse_name_as_expression(self) -> exp.Expr | None: 5376 this = self._parse_id_var(any_token=True) 5377 if self._match(TokenType.ALIAS): 5378 this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction())) 5379 return this 5380 5381 def _parse_interpolate(self) -> list[exp.Expr] | None: 5382 if self._match_text_seq("INTERPOLATE"): 5383 return self._parse_wrapped_csv(self._parse_name_as_expression) 5384 return None 5385 5386 def _parse_order( 5387 self, this: exp.Expr | None = None, skip_order_token: bool = False 5388 ) -> exp.Expr | None: 5389 siblings = None 5390 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5391 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5392 return this 5393 5394 siblings = True 5395 5396 comments = self._prev_comments 5397 return self.expression( 5398 exp.Order( 5399 this=this, 5400 expressions=self._parse_csv(self._parse_ordered), 5401 siblings=siblings, 5402 ), 5403 comments=comments, 5404 ) 5405 5406 def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None: 5407 if not self._match(token): 5408 return None 5409 return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered))) 5410 5411 def _parse_ordered( 5412 self, parse_method: t.Callable[[], exp.Expr | None] | None = None 5413 ) -> exp.Ordered | None: 5414 this = parse_method() if parse_method else self._parse_disjunction() 5415 if not this: 5416 return None 5417 5418 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5419 this = exp.var("ALL") 5420 5421 asc = self._match(TokenType.ASC) 5422 desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None) 5423 5424 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5425 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5426 5427 nulls_first = is_nulls_first or False 5428 explicitly_null_ordered = is_nulls_first or is_nulls_last 5429 5430 if ( 5431 not explicitly_null_ordered 5432 and ( 5433 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5434 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5435 ) 5436 and self.dialect.NULL_ORDERING != "nulls_are_last" 5437 ): 5438 nulls_first = True 5439 5440 if self._match_text_seq("WITH", "FILL"): 5441 with_fill = self.expression( 5442 exp.WithFill( 5443 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5444 to=self._match_text_seq("TO") and self._parse_bitwise(), 5445 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5446 interpolate=self._parse_interpolate(), 5447 ) 5448 ) 5449 else: 5450 with_fill = None 5451 5452 return self.expression( 5453 exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill) 5454 ) 5455 5456 def _parse_limit_options(self) -> exp.LimitOptions | None: 5457 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5458 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5459 self._match_text_seq("ONLY") 5460 with_ties = self._match_text_seq("WITH", "TIES") 5461 5462 if not (percent or rows or with_ties): 5463 return None 5464 5465 return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties)) 5466 5467 def _parse_limit( 5468 self, 5469 this: exp.Expr | None = None, 5470 top: bool = False, 5471 skip_limit_token: bool = False, 5472 ) -> exp.Expr | None: 5473 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5474 comments = self._prev_comments 5475 if top: 5476 limit_paren = self._match(TokenType.L_PAREN) 5477 expression = ( 5478 self._parse_term() or self._parse_select() 5479 if limit_paren 5480 else self._parse_number() 5481 ) 5482 5483 if limit_paren: 5484 self._match_r_paren() 5485 5486 else: 5487 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5488 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5489 # consume the factor plus parse the percentage separately 5490 index = self._index 5491 expression = self._try_parse(self._parse_term) 5492 if isinstance(expression, exp.Mod): 5493 self._retreat(index) 5494 expression = self._parse_factor() 5495 elif not expression: 5496 expression = self._parse_factor() 5497 limit_options = self._parse_limit_options() 5498 5499 if self._match(TokenType.COMMA): 5500 offset = expression 5501 expression = self._parse_term() 5502 else: 5503 offset = None 5504 5505 limit_exp = self.expression( 5506 exp.Limit( 5507 this=this, 5508 expression=expression, 5509 offset=offset, 5510 limit_options=limit_options, 5511 expressions=self._parse_limit_by(), 5512 ), 5513 comments=comments, 5514 ) 5515 5516 return limit_exp 5517 5518 if self._match(TokenType.FETCH): 5519 direction = ( 5520 self._prev.text.upper() 5521 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5522 else "FIRST" 5523 ) 5524 5525 count = self._parse_field(tokens=self.FETCH_TOKENS) 5526 5527 return self.expression( 5528 exp.Fetch( 5529 direction=direction, count=count, limit_options=self._parse_limit_options() 5530 ) 5531 ) 5532 5533 return this 5534 5535 def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None: 5536 if not self._match(TokenType.OFFSET): 5537 return this 5538 5539 count = self._parse_term() 5540 self._match_set((TokenType.ROW, TokenType.ROWS)) 5541 5542 return self.expression( 5543 exp.Offset(this=this, expression=count, expressions=self._parse_limit_by()) 5544 ) 5545 5546 def _can_parse_limit_or_offset(self) -> bool: 5547 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5548 return False 5549 5550 index = self._index 5551 result = bool( 5552 self._try_parse(self._parse_limit, retreat=True) 5553 or self._try_parse(self._parse_offset, retreat=True) 5554 ) 5555 self._retreat(index) 5556 5557 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5558 if self._next.token_type == TokenType.MATCH_CONDITION: 5559 result = False 5560 5561 return result 5562 5563 def _can_parse_named_window(self) -> bool: 5564 # `WINDOW` is in ID_VAR_TOKENS so it could be mistakenly consumed as an implicit alias. 5565 # Refuse only when the following tokens look like a named-window clause: `WINDOW <id> AS (`. 5566 if not self._match(TokenType.WINDOW, advance=False): 5567 return False 5568 5569 name = self._tokens[self._index + 1] if self._index + 1 < len(self._tokens) else None 5570 if name is None or name.token_type not in self.ID_VAR_TOKENS: 5571 return False 5572 5573 alias_tok = self._tokens[self._index + 2] if self._index + 2 < len(self._tokens) else None 5574 if alias_tok is None or alias_tok.token_type != TokenType.ALIAS: 5575 return False 5576 5577 body = self._tokens[self._index + 3] if self._index + 3 < len(self._tokens) else None 5578 return body is not None and body.token_type == TokenType.L_PAREN 5579 5580 def _parse_limit_by(self) -> list[exp.Expr] | None: 5581 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5582 5583 def _parse_locks(self) -> list[exp.Lock]: 5584 locks = [] 5585 while True: 5586 update, key = None, None 5587 if self._match_text_seq("FOR", "UPDATE"): 5588 update = True 5589 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5590 "LOCK", "IN", "SHARE", "MODE" 5591 ): 5592 update = False 5593 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5594 update, key = False, True 5595 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5596 update, key = True, True 5597 else: 5598 break 5599 5600 expressions = None 5601 if self._match_text_seq("OF"): 5602 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5603 5604 wait: bool | exp.Expr | None = None 5605 if self._match_text_seq("NOWAIT"): 5606 wait = True 5607 elif self._match_text_seq("WAIT"): 5608 wait = self._parse_primary() 5609 elif self._match_text_seq("SKIP", "LOCKED"): 5610 wait = False 5611 5612 locks.append( 5613 self.expression( 5614 exp.Lock(update=update, expressions=expressions, wait=wait, key=key) 5615 ) 5616 ) 5617 5618 return locks 5619 5620 def parse_set_operation( 5621 self, this: exp.Expr | None, consume_pipe: bool = False 5622 ) -> exp.Expr | None: 5623 start = self._index 5624 _, side_token, kind_token = self._parse_join_parts() 5625 5626 side = side_token.text if side_token else None 5627 kind = kind_token.text if kind_token else None 5628 5629 if not self._match_set(self.SET_OPERATIONS): 5630 self._retreat(start) 5631 return None 5632 5633 token_type = self._prev.token_type 5634 5635 if token_type == TokenType.UNION: 5636 operation: type[exp.SetOperation] = exp.Union 5637 elif token_type == TokenType.EXCEPT: 5638 operation = exp.Except 5639 else: 5640 operation = exp.Intersect 5641 5642 comments = self._prev.comments 5643 5644 if self._match(TokenType.DISTINCT): 5645 distinct: bool | None = True 5646 elif self._match(TokenType.ALL): 5647 distinct = False 5648 else: 5649 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5650 if distinct is None: 5651 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5652 5653 by_name = ( 5654 self._match_text_seq("BY", "NAME") 5655 or self._match_text_seq("STRICT", "CORRESPONDING") 5656 or None 5657 ) 5658 if self._match_text_seq("CORRESPONDING"): 5659 by_name = True 5660 if not side and not kind: 5661 kind = "INNER" 5662 5663 on_column_list = None 5664 if by_name and self._match_texts(("ON", "BY")): 5665 on_column_list = self._parse_wrapped_csv(self._parse_column) 5666 5667 expression = self._parse_select( 5668 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5669 ) 5670 5671 return self.expression( 5672 operation( 5673 this=this, 5674 distinct=distinct, 5675 by_name=by_name, 5676 expression=expression, 5677 side=side, 5678 kind=kind, 5679 on=on_column_list, 5680 ), 5681 comments=comments, 5682 ) 5683 5684 def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None: 5685 while this: 5686 setop = self.parse_set_operation(this) 5687 if not setop: 5688 break 5689 this = setop 5690 5691 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5692 expression = this.expression 5693 5694 if expression: 5695 for arg in self.SET_OP_MODIFIERS: 5696 expr = expression.args.get(arg) 5697 if expr: 5698 this.set(arg, expr.pop()) 5699 5700 return this 5701 5702 def _parse_expression(self) -> exp.Expr | None: 5703 return self._parse_alias(self._parse_assignment()) 5704 5705 def _parse_assignment(self) -> exp.Expr | None: 5706 this = self._parse_disjunction() 5707 if not this and self._next.token_type in self.ASSIGNMENT: 5708 # This allows us to parse <non-identifier token> := <expr> 5709 this = exp.column( 5710 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5711 ) 5712 5713 while self._match_set(self.ASSIGNMENT): 5714 if isinstance(this, exp.Column) and len(this.parts) == 1: 5715 this = this.this 5716 5717 comments = self._prev_comments 5718 this = self.expression( 5719 self.ASSIGNMENT[self._prev.token_type]( 5720 this=this, expression=self._parse_assignment() 5721 ), 5722 comments=comments, 5723 ) 5724 5725 return this 5726 5727 def _parse_disjunction(self) -> exp.Expr | None: 5728 this = self._parse_conjunction() 5729 while self._match_set(self.DISJUNCTION): 5730 comments = self._prev_comments 5731 this = self.expression( 5732 self.DISJUNCTION[self._prev.token_type]( 5733 this=this, expression=self._parse_conjunction() 5734 ), 5735 comments=comments, 5736 ) 5737 return this 5738 5739 def _parse_conjunction(self) -> exp.Expr | None: 5740 this = self._parse_equality() 5741 while self._match_set(self.CONJUNCTION): 5742 comments = self._prev_comments 5743 this = self.expression( 5744 self.CONJUNCTION[self._prev.token_type]( 5745 this=this, expression=self._parse_equality() 5746 ), 5747 comments=comments, 5748 ) 5749 return this 5750 5751 def _parse_equality(self) -> exp.Expr | None: 5752 this = self._parse_comparison() 5753 while self._match_set(self.EQUALITY): 5754 comments = self._prev_comments 5755 this = self.expression( 5756 self.EQUALITY[self._prev.token_type]( 5757 this=this, expression=self._parse_comparison() 5758 ), 5759 comments=comments, 5760 ) 5761 return this 5762 5763 def _parse_comparison(self) -> exp.Expr | None: 5764 this = self._parse_range() 5765 while self._match_set(self.COMPARISON): 5766 comments = self._prev_comments 5767 this = self.expression( 5768 self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()), 5769 comments=comments, 5770 ) 5771 return this 5772 5773 def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5774 this = this or self._parse_bitwise() 5775 negate = self._match(TokenType.NOT) 5776 5777 if self._match_set(self.RANGE_PARSERS): 5778 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5779 if not expression: 5780 return this 5781 5782 this = expression 5783 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5784 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5785 5786 # Postgres supports ISNULL and NOTNULL for conditions. 5787 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5788 if self._match(TokenType.NOTNULL): 5789 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5790 this = self.expression(exp.Not(this=this)) 5791 5792 if negate: 5793 this = self._negate_range(this) 5794 5795 if self._match(TokenType.IS): 5796 this = self._parse_is(this) 5797 5798 return this 5799 5800 def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5801 if not this: 5802 return this 5803 5804 expression = this.this if isinstance(this, exp.Escape) else this 5805 if isinstance(expression, (exp.Like, exp.ILike)): 5806 expression.set("negate", True) 5807 return this 5808 5809 return self.expression(exp.Not(this=this)) 5810 5811 def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None: 5812 index = self._index - 1 5813 negate = self._match(TokenType.NOT) 5814 5815 if self._match_text_seq("DISTINCT", "FROM"): 5816 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5817 return self.expression(klass(this=this, expression=self._parse_bitwise())) 5818 5819 if self._match(TokenType.JSON): 5820 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5821 5822 if self._match_text_seq("WITH"): 5823 _with = True 5824 elif self._match_text_seq("WITHOUT"): 5825 _with = False 5826 else: 5827 _with = None 5828 5829 unique = self._match(TokenType.UNIQUE) 5830 self._match_text_seq("KEYS") 5831 expression: exp.Expr | None = self.expression( 5832 exp.JSON(this=kind, with_=_with, unique=unique) 5833 ) 5834 else: 5835 expression = self._parse_null() or self._parse_bitwise() 5836 if not expression: 5837 self._retreat(index) 5838 return None 5839 5840 this = self.expression(exp.Is(this=this, expression=expression)) 5841 this = self.expression(exp.Not(this=this)) if negate else this 5842 return self._parse_column_ops(this) 5843 5844 def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In: 5845 unnest = self._parse_unnest(with_alias=False) 5846 if unnest: 5847 this = self.expression(exp.In(this=this, unnest=unnest)) 5848 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5849 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5850 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5851 5852 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5853 this = self.expression( 5854 exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False)) 5855 ) 5856 else: 5857 this = self.expression(exp.In(this=this, expressions=expressions)) 5858 5859 if matched_l_paren: 5860 self._match_r_paren(this) 5861 elif not self._match(TokenType.R_BRACKET, expression=this): 5862 self.raise_error("Expecting ]") 5863 else: 5864 this = self.expression(exp.In(this=this, field=self._parse_column())) 5865 5866 return this 5867 5868 def _parse_between(self, this: exp.Expr | None) -> exp.Between: 5869 symmetric = None 5870 if self._match_text_seq("SYMMETRIC"): 5871 symmetric = True 5872 elif self._match_text_seq("ASYMMETRIC"): 5873 symmetric = False 5874 5875 low = self._parse_bitwise() 5876 self._match(TokenType.AND) 5877 high = self._parse_bitwise() 5878 5879 return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric)) 5880 5881 def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None: 5882 if not self._match(TokenType.ESCAPE): 5883 return this 5884 return self.expression( 5885 exp.Escape(this=this, expression=self._parse_string() or self._parse_null()) 5886 ) 5887 5888 def _parse_interval_span(self, this: exp.Expr) -> exp.Interval: 5889 # handle day-time format interval span with omitted units: 5890 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5891 interval_span_units_omitted = None 5892 if ( 5893 this 5894 and this.is_string 5895 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5896 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5897 ): 5898 index = self._index 5899 5900 # Var "TO" Var 5901 first_unit = self._parse_var(any_token=True, upper=True) 5902 second_unit = None 5903 if first_unit and self._match_text_seq("TO"): 5904 second_unit = self._parse_var(any_token=True, upper=True) 5905 5906 interval_span_units_omitted = not (first_unit and second_unit) 5907 5908 self._retreat(index) 5909 5910 if interval_span_units_omitted: 5911 unit = None 5912 else: 5913 unit = self._parse_function() 5914 if not unit and ( 5915 self._curr.token_type == TokenType.VAR 5916 or self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS 5917 ): 5918 unit = self._parse_var(any_token=True, upper=True) 5919 5920 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5921 # each INTERVAL expression into this canonical form so it's easy to transpile 5922 if this and this.is_number: 5923 this = exp.Literal.string(this.to_py()) 5924 elif this and this.is_string: 5925 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5926 if parts and unit: 5927 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5928 unit = None 5929 self._retreat(self._index - 1) 5930 5931 if len(parts) == 1: 5932 this = exp.Literal.string(parts[0][0]) 5933 unit = self.expression(exp.Var(this=parts[0][1].upper())) 5934 5935 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5936 unit = self.expression( 5937 exp.IntervalSpan( 5938 this=unit, 5939 expression=self._parse_function() 5940 or self._parse_var(any_token=True, upper=True), 5941 ) 5942 ) 5943 5944 return self.expression(exp.Interval(this=this, unit=unit)) 5945 5946 def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None: 5947 index = self._index 5948 5949 if not self._match(TokenType.INTERVAL) and require_interval: 5950 return None 5951 5952 if self._match(TokenType.STRING, advance=False): 5953 this = self._parse_primary() 5954 else: 5955 this = self._parse_term() 5956 5957 if not this or ( 5958 isinstance(this, exp.Column) 5959 and not this.table 5960 and not this.this.quoted 5961 and self._curr 5962 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5963 ): 5964 self._retreat(index) 5965 return None 5966 5967 interval = self._parse_interval_span(this) 5968 5969 index = self._index 5970 self._match(TokenType.PLUS) 5971 5972 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5973 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5974 return self.expression(exp.Add(this=interval, expression=self._parse_interval(False))) 5975 5976 self._retreat(index) 5977 return interval 5978 5979 def _parse_bitwise(self) -> exp.Expr | None: 5980 this = self._parse_term() 5981 5982 while True: 5983 if self._match_set(self.BITWISE): 5984 this = self.expression( 5985 self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term()) 5986 ) 5987 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5988 this = self.expression( 5989 exp.DPipe( 5990 this=this, 5991 expression=self._parse_term(), 5992 safe=not self.dialect.STRICT_STRING_CONCAT, 5993 ) 5994 ) 5995 elif self._match(TokenType.DQMARK): 5996 this = self.expression( 5997 exp.Coalesce(this=this, expressions=ensure_list(self._parse_term())) 5998 ) 5999 elif self._match_pair(TokenType.LT, TokenType.LT): 6000 this = self.expression( 6001 exp.BitwiseLeftShift(this=this, expression=self._parse_term()) 6002 ) 6003 elif self._match_pair(TokenType.GT, TokenType.GT): 6004 this = self.expression( 6005 exp.BitwiseRightShift(this=this, expression=self._parse_term()) 6006 ) 6007 else: 6008 break 6009 6010 return this 6011 6012 def _parse_term(self) -> exp.Expr | None: 6013 this = self._parse_factor() 6014 6015 while self._match_set(self.TERM): 6016 klass = self.TERM[self._prev.token_type] 6017 comments = self._prev_comments 6018 expression = self._parse_factor() 6019 6020 this = self.expression(klass(this=this, expression=expression), comments=comments) 6021 6022 if isinstance(this, exp.Collate): 6023 expr = this.expression 6024 6025 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 6026 # fallback to Identifier / Var 6027 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 6028 ident = expr.this 6029 if isinstance(ident, exp.Identifier): 6030 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 6031 6032 return this 6033 6034 def _parse_factor(self) -> exp.Expr | None: 6035 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 6036 this = self._parse_at_time_zone(parse_method()) 6037 6038 while self._match_set(self.FACTOR): 6039 klass = self.FACTOR[self._prev.token_type] 6040 comments = self._prev_comments 6041 expression = parse_method() 6042 6043 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 6044 self._retreat(self._index - 1) 6045 return this 6046 6047 this = self.expression(klass(this=this, expression=expression), comments=comments) 6048 6049 if isinstance(this, exp.Div): 6050 this.set("typed", self.dialect.TYPED_DIVISION) 6051 this.set("safe", self.dialect.SAFE_DIVISION) 6052 6053 return this 6054 6055 def _parse_exponent(self) -> exp.Expr | None: 6056 this = self._parse_unary() 6057 while self._match_set(self.EXPONENT): 6058 comments = self._prev_comments 6059 this = self.expression( 6060 self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()), 6061 comments=comments, 6062 ) 6063 return this 6064 6065 def _parse_unary(self) -> exp.Expr | None: 6066 if self._match_set(self.UNARY_PARSERS): 6067 return self.UNARY_PARSERS[self._prev.token_type](self) 6068 return self._parse_type() 6069 6070 def _parse_type( 6071 self, parse_interval: bool = True, fallback_to_identifier: bool = False 6072 ) -> exp.Expr | None: 6073 if not fallback_to_identifier and (atom := self._parse_atom()) is not None: 6074 return atom 6075 6076 if interval := parse_interval and self._parse_interval(): 6077 return self._parse_column_ops(interval) 6078 6079 index = self._index 6080 data_type = self._parse_types(check_func=True, allow_identifiers=False) 6081 6082 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 6083 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 6084 if isinstance(data_type, exp.Cast): 6085 # This constructor can contain ops directly after it, for instance struct unnesting: 6086 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 6087 return self._parse_column_ops(data_type) 6088 6089 if data_type: 6090 index2 = self._index 6091 this = self._parse_primary() 6092 6093 if isinstance(this, exp.Literal): 6094 literal = this.name 6095 this = self._parse_column_ops(this) 6096 6097 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 6098 if parser: 6099 return parser(self, this, data_type) 6100 6101 if ( 6102 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 6103 and data_type.is_type(exp.DType.TIMESTAMP) 6104 and TIME_ZONE_RE.search(literal) 6105 ): 6106 data_type = exp.DType.TIMESTAMPTZ.into_expr() 6107 6108 return self.expression(exp.Cast(this=this, to=data_type)) 6109 6110 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 6111 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 6112 # 6113 # If the index difference here is greater than 1, that means the parser itself must have 6114 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 6115 # 6116 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 6117 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 6118 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 6119 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 6120 # 6121 # In these cases, we don't really want to return the converted type, but instead retreat 6122 # and try to parse a Column or Identifier in the section below. 6123 if data_type.expressions and index2 - index > 1: 6124 self._retreat(index2) 6125 return self._parse_column_ops(data_type) 6126 6127 self._retreat(index) 6128 6129 if fallback_to_identifier: 6130 return self._parse_id_var() 6131 6132 return self._parse_column() 6133 6134 def _parse_type_size(self) -> exp.DataTypeParam | None: 6135 this = self._parse_type() 6136 if not this: 6137 return None 6138 6139 if isinstance(this, exp.Column) and not this.table: 6140 this = exp.var(this.name.upper()) 6141 6142 return self.expression( 6143 exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True)) 6144 ) 6145 6146 def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None: 6147 type_name = identifier.name 6148 6149 while self._match(TokenType.DOT): 6150 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 6151 6152 return exp.DataType.from_str(type_name, dialect=self.dialect, udt=True) 6153 6154 def _parse_types( 6155 self, 6156 check_func: bool = False, 6157 schema: bool = False, 6158 allow_identifiers: bool = True, 6159 with_collation: bool = False, 6160 ) -> exp.Expr | None: 6161 index = self._index 6162 this: exp.Expr | None = None 6163 6164 if self._match_set(self.TYPE_TOKENS): 6165 type_token = self._prev.token_type 6166 else: 6167 type_token = None 6168 identifier = allow_identifiers and self._parse_id_var( 6169 any_token=False, tokens=(TokenType.VAR,) 6170 ) 6171 if isinstance(identifier, exp.Identifier): 6172 try: 6173 tokens = self.dialect.tokenize(identifier.name) 6174 except TokenError: 6175 tokens = None 6176 6177 if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS: 6178 if len(tokens) > 1: 6179 return exp.DataType.from_str(identifier.name, dialect=self.dialect) 6180 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 6181 this = self._parse_user_defined_type(identifier) 6182 else: 6183 self._retreat(self._index - 1) 6184 return None 6185 else: 6186 return None 6187 6188 if type_token == TokenType.PSEUDO_TYPE: 6189 return self.expression(exp.PseudoType(this=self._prev.text.upper())) 6190 6191 if type_token == TokenType.OBJECT_IDENTIFIER: 6192 return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper())) 6193 6194 # https://materialize.com/docs/sql/types/map/ 6195 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 6196 key_type = self._parse_types( 6197 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6198 ) 6199 if not self._match(TokenType.FARROW): 6200 self._retreat(index) 6201 return None 6202 6203 value_type = self._parse_types( 6204 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6205 ) 6206 if not self._match(TokenType.R_BRACKET): 6207 self._retreat(index) 6208 return None 6209 6210 return exp.DataType( 6211 this=exp.DType.MAP, 6212 expressions=[key_type, value_type], 6213 nested=True, 6214 ) 6215 6216 nested = type_token in self.NESTED_TYPE_TOKENS 6217 is_struct = type_token in self.STRUCT_TYPE_TOKENS 6218 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 6219 expressions = None 6220 maybe_func = False 6221 6222 if self._match(TokenType.L_PAREN): 6223 if is_struct: 6224 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6225 elif nested: 6226 expressions = self._parse_csv( 6227 lambda: self._parse_types( 6228 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6229 ) 6230 ) 6231 if type_token == TokenType.NULLABLE and len(expressions) == 1: 6232 this = expressions[0] 6233 this.set("nullable", True) 6234 self._match_r_paren() 6235 return this 6236 elif type_token in self.ENUM_TYPE_TOKENS: 6237 expressions = self._parse_csv(self._parse_equality) 6238 elif type_token == TokenType.JSON: 6239 # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value) 6240 # https://clickhouse.com/docs/sql-reference/data-types/newjson 6241 expressions = self._parse_csv(self._parse_json_type_arg) 6242 elif is_aggregate: 6243 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 6244 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 6245 ) 6246 if not func_or_ident: 6247 return None 6248 expressions = [func_or_ident] 6249 if self._match(TokenType.COMMA): 6250 expressions.extend( 6251 self._parse_csv( 6252 lambda: self._parse_types( 6253 check_func=check_func, 6254 schema=schema, 6255 allow_identifiers=allow_identifiers, 6256 ) 6257 ) 6258 ) 6259 else: 6260 expressions = self._parse_csv(self._parse_type_size) 6261 6262 # https://docs.snowflake.com/en/sql-reference/data-types-vector 6263 if type_token == TokenType.VECTOR and len(expressions) == 2: 6264 expressions = self._parse_vector_expressions(expressions) 6265 6266 if not self._match(TokenType.R_PAREN): 6267 self._retreat(index) 6268 return None 6269 6270 maybe_func = True 6271 6272 values: list[exp.Expr] | None = None 6273 6274 if nested and self._match(TokenType.LT): 6275 if is_struct: 6276 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6277 else: 6278 expressions = self._parse_csv( 6279 lambda: self._parse_types( 6280 check_func=check_func, 6281 schema=schema, 6282 allow_identifiers=allow_identifiers, 6283 with_collation=True, 6284 ) 6285 ) 6286 6287 if not self._match(TokenType.GT): 6288 self.raise_error("Expecting >") 6289 6290 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 6291 values = self._parse_csv(self._parse_disjunction) 6292 if not values and is_struct: 6293 values = None 6294 self._retreat(self._index - 1) 6295 else: 6296 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 6297 6298 if type_token in self.TIMESTAMPS: 6299 if self._match_text_seq("WITH", "TIME", "ZONE"): 6300 maybe_func = False 6301 tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ 6302 this = exp.DataType(this=tz_type, expressions=expressions) 6303 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 6304 maybe_func = False 6305 this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions) 6306 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 6307 maybe_func = False 6308 elif type_token == TokenType.INTERVAL: 6309 if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 6310 unit = self._parse_var(upper=True) 6311 if self._match_text_seq("TO"): 6312 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 6313 6314 this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit)))) 6315 else: 6316 this = self.expression(exp.DataType(this=exp.DType.INTERVAL)) 6317 elif type_token == TokenType.VOID: 6318 this = exp.DataType(this=exp.DType.NULL) 6319 6320 if maybe_func and check_func: 6321 index2 = self._index 6322 peek = self._parse_string() 6323 6324 if not peek: 6325 self._retreat(index) 6326 return None 6327 6328 self._retreat(index2) 6329 6330 if not this: 6331 assert type_token is not None 6332 if self._match_text_seq("UNSIGNED"): 6333 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 6334 if not unsigned_type_token: 6335 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 6336 6337 type_token = unsigned_type_token or type_token 6338 6339 # NULLABLE without parentheses can be a column (Presto/Trino) 6340 if type_token == TokenType.NULLABLE and not expressions: 6341 self._retreat(index) 6342 return None 6343 6344 this = exp.DataType( 6345 this=exp.DType[type_token.name], 6346 expressions=expressions, 6347 nested=nested, 6348 ) 6349 6350 # Empty arrays/structs are allowed 6351 if values is not None: 6352 cls = exp.Struct if is_struct else exp.Array 6353 this = exp.cast(cls(expressions=values), this, copy=False) 6354 6355 elif expressions: 6356 this.set("expressions", expressions) 6357 6358 # https://materialize.com/docs/sql/types/list/#type-name 6359 while self._match(TokenType.LIST): 6360 this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True) 6361 6362 index = self._index 6363 6364 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 6365 matched_array = self._match(TokenType.ARRAY) 6366 6367 while self._curr: 6368 datatype_token = self._prev.token_type 6369 matched_l_bracket = self._match(TokenType.L_BRACKET) 6370 6371 if (not matched_l_bracket and not matched_array) or ( 6372 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 6373 ): 6374 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 6375 # not to be confused with the fixed size array parsing 6376 break 6377 6378 matched_array = False 6379 values = self._parse_csv(self._parse_disjunction) or None 6380 if ( 6381 values 6382 and not schema 6383 and ( 6384 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 6385 or datatype_token == TokenType.ARRAY 6386 or not self._match(TokenType.R_BRACKET, advance=False) 6387 ) 6388 ): 6389 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 6390 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 6391 self._retreat(index) 6392 break 6393 6394 this = exp.DataType( 6395 this=exp.DType.ARRAY, expressions=[this], values=values, nested=True 6396 ) 6397 self._match(TokenType.R_BRACKET) 6398 6399 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType): 6400 converter = self.TYPE_CONVERTERS.get(this.this) 6401 if converter: 6402 this = converter(t.cast(exp.DataType, this)) 6403 6404 if with_collation and isinstance(this, exp.DataType) and self._match(TokenType.COLLATE): 6405 this.set("collate", self._parse_identifier() or self._parse_column()) 6406 6407 return this 6408 6409 def _parse_json_type_arg(self) -> exp.Expr | None: 6410 """Parse a single argument to ClickHouse's JSON type.""" 6411 6412 # SKIP col or SKIP REGEXP 'pattern' 6413 if self._match_text_seq("SKIP"): 6414 regexp = self._match(TokenType.RLIKE) 6415 arg = self._parse_column() 6416 if isinstance(arg, exp.Column): 6417 arg = arg.to_dot() 6418 return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg)) 6419 6420 param_or_col = self._parse_column() 6421 if not isinstance(param_or_col, exp.Column): 6422 return None 6423 6424 # Parameter: name=value (e.g., max_dynamic_paths=2) 6425 if len(param_or_col.parts) == 1 and self._match(TokenType.EQ): 6426 param = param_or_col.name 6427 value = self._parse_primary() 6428 return self.expression(exp.EQ(this=exp.var(param), expression=value)) 6429 6430 # Column type hint: col_name Type 6431 col = param_or_col.to_dot() 6432 kind = self._parse_types(check_func=False, allow_identifiers=False) 6433 return self.expression(exp.ColumnDef(this=col, kind=kind)) 6434 6435 def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]: 6436 return [exp.DataType.from_str(expressions[0].name, dialect=self.dialect), *expressions[1:]] 6437 6438 def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None: 6439 index = self._index 6440 6441 if ( 6442 self._curr 6443 and self._next 6444 and self._curr.token_type in self.TYPE_TOKENS 6445 and self._next.token_type in self.TYPE_TOKENS 6446 ): 6447 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 6448 # type token. Without this, the list will be parsed as a type and we'll eventually crash 6449 this = self._parse_id_var() 6450 else: 6451 this = ( 6452 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6453 or self._parse_id_var() 6454 ) 6455 6456 self._match(TokenType.COLON) 6457 6458 if ( 6459 type_required 6460 and not isinstance(this, exp.DataType) 6461 and not self._match_set(self.TYPE_TOKENS, advance=False) 6462 ): 6463 self._retreat(index) 6464 return self._parse_types() 6465 6466 return self._parse_column_def(this) 6467 6468 def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None: 6469 if not self._match_text_seq("AT", "TIME", "ZONE"): 6470 return this 6471 return self._parse_at_time_zone( 6472 self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary())) 6473 ) 6474 6475 def _parse_atom(self) -> exp.Expr | None: 6476 if ( 6477 self._curr.token_type in self.IDENTIFIER_TOKENS 6478 and (column := self._parse_column()) is not None 6479 ): 6480 return column 6481 6482 token = self._curr 6483 token_type = token.token_type 6484 6485 if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)): 6486 return None 6487 6488 next_type = self._next.token_type 6489 6490 if ( 6491 next_type in self.COLUMN_OPERATORS 6492 or next_type in self.COLUMN_POSTFIX_TOKENS 6493 or (token_type == TokenType.STRING and next_type == TokenType.STRING) 6494 ): 6495 return None 6496 6497 self._advance() 6498 return primary_parser(self, token) 6499 6500 def _parse_column(self) -> exp.Expr | None: 6501 column: exp.Expr | None = self._parse_column_parts_fast() 6502 if column is None: 6503 this = self._parse_column_reference() 6504 if not this: 6505 this = self._parse_bracket(this) 6506 column = self._parse_column_ops(this) if this else this 6507 6508 if column: 6509 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: 6510 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6511 if self.COLON_IS_VARIANT_EXTRACT: 6512 column = self._parse_colon_as_variant_extract(column) 6513 6514 return column 6515 6516 def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None: 6517 """Fast path for simple column and dot references (a, a.b, ...). 6518 6519 Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks 6520 that nothing complex follows. If it does, retreats and returns None so 6521 the slow path can handle it. For >4 parts, wraps in exp.Dot nodes. 6522 """ 6523 index = self._index 6524 parts: list[exp.Identifier] | None = None 6525 all_comments: list[str] | None = None 6526 6527 while self._match_set(self.IDENTIFIER_TOKENS): 6528 token = self._prev 6529 comments = self._prev_comments 6530 6531 if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS: 6532 self._retreat(index) 6533 return None 6534 6535 has_dot = self._match(TokenType.DOT) 6536 curr_tt = self._curr.token_type 6537 6538 if not has_dot: 6539 if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS: 6540 self._retreat(index) 6541 return None 6542 elif curr_tt not in self.IDENTIFIER_TOKENS: 6543 self._retreat(index) 6544 return None 6545 6546 if parts is None: 6547 parts = [] 6548 6549 if comments: 6550 if all_comments is None: 6551 all_comments = [] 6552 all_comments.extend(comments) 6553 self._prev_comments = [] 6554 6555 parts.append( 6556 self.expression( 6557 exp.Identifier( 6558 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 6559 ), 6560 token, 6561 ) 6562 ) 6563 6564 if not has_dot: 6565 break 6566 6567 if parts is None: 6568 return None 6569 6570 n = len(parts) 6571 6572 if n == 1: 6573 column: exp.Column | exp.Dot = exp.Column(this=parts[0]) 6574 elif n == 2: 6575 column = exp.Column(this=parts[1], table=parts[0]) 6576 elif n == 3: 6577 column = exp.Column(this=parts[2], table=parts[1], db=parts[0]) 6578 else: 6579 column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0]) 6580 6581 for i in range(4, n): 6582 column = exp.Dot(this=column, expression=parts[i]) 6583 6584 if all_comments: 6585 column.add_comments(all_comments) 6586 6587 return column 6588 6589 def _parse_column_reference(self) -> exp.Expr | None: 6590 this = self._parse_field() 6591 if ( 6592 not this 6593 and self._match(TokenType.VALUES, advance=False) 6594 and self.VALUES_FOLLOWED_BY_PAREN 6595 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6596 ): 6597 this = self._parse_id_var() 6598 6599 if isinstance(this, exp.Identifier): 6600 # We bubble up comments from the Identifier to the Column 6601 this = self.expression(exp.Column(this=this), comments=this.pop_comments()) 6602 6603 return this 6604 6605 def _build_json_extract( 6606 self, 6607 this: exp.Expr | None, 6608 path_parts: list[exp.JSONPathPart], 6609 escape: bool | None, 6610 ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]: 6611 if len(path_parts) > 1: 6612 this = self.expression( 6613 exp.JSONExtract( 6614 this=this, 6615 expression=exp.JSONPath(expressions=path_parts, escape=escape), 6616 variant_extract=True, 6617 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6618 ) 6619 ) 6620 path_parts = [exp.JSONPathRoot()] 6621 6622 return this, path_parts 6623 6624 def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None: 6625 path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()] 6626 escape = None 6627 6628 while self._match(TokenType.COLON): 6629 key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6630 6631 if key: 6632 if isinstance(key, exp.Identifier) and key.quoted: 6633 escape = True 6634 path_parts.append(exp.JSONPathKey(this=key.name)) 6635 6636 while True: 6637 if self._match(TokenType.DOT): 6638 next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6639 6640 if next_key: 6641 if isinstance(next_key, exp.Identifier) and next_key.quoted: 6642 escape = True 6643 path_parts.append(exp.JSONPathKey(this=next_key.name)) 6644 elif self._match(TokenType.L_BRACKET): 6645 bracket_expr = self._parse_bracket_key_value() 6646 6647 if not self._match(TokenType.R_BRACKET): 6648 self.raise_error("Expected ]") 6649 6650 if bracket_expr: 6651 if bracket_expr.is_string: 6652 path_parts.append(exp.JSONPathKey(this=bracket_expr.name)) 6653 escape = True 6654 elif bracket_expr.is_star: 6655 path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard())) 6656 elif bracket_expr.is_number: 6657 path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py())) 6658 else: 6659 this, path_parts = self._build_json_extract(this, path_parts, escape) 6660 escape = None 6661 6662 this = self.expression( 6663 exp.Bracket( 6664 this=this, expressions=[bracket_expr], json_access=True 6665 ), 6666 ) 6667 6668 elif self._match(TokenType.DCOLON): 6669 this, path_parts = self._build_json_extract(this, path_parts, escape) 6670 escape = None 6671 6672 cast_type = self._parse_types() 6673 if cast_type: 6674 this = self.expression(exp.Cast(this=this, to=cast_type)) 6675 else: 6676 self.raise_error("Expected type after '::'") 6677 else: 6678 break 6679 6680 this, _ = self._build_json_extract(this, path_parts, escape) 6681 6682 return this 6683 6684 def _parse_dcolon(self) -> exp.Expr | None: 6685 return self._parse_types() 6686 6687 def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None: 6688 while self._curr.token_type in self.BRACKETS: 6689 this = self._parse_bracket(this) 6690 6691 column_operators = self.COLUMN_OPERATORS 6692 cast_column_operators = self.CAST_COLUMN_OPERATORS 6693 while self._curr: 6694 op_token = self._curr.token_type 6695 6696 if op_token not in column_operators: 6697 break 6698 op = column_operators[op_token] 6699 self._advance() 6700 6701 if op_token in cast_column_operators: 6702 field = self._parse_dcolon() 6703 if not field: 6704 self.raise_error("Expected type") 6705 elif op and self._curr: 6706 field = self._parse_column_reference() or self._parse_bitwise() 6707 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6708 field = self._parse_column_ops(field) 6709 else: 6710 field = self._parse_field(any_token=True, anonymous_func=True) 6711 6712 # Function calls can be qualified, e.g., x.y.FOO() 6713 # This converts the final AST to a series of Dots leading to the function call 6714 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6715 if isinstance(field, (exp.Func, exp.Window)) and this: 6716 this = this.transform( 6717 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6718 ) 6719 6720 if op: 6721 this = op(self, this, field) 6722 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6723 this = self.expression( 6724 exp.Column( 6725 this=field, 6726 table=this.this, 6727 db=this.args.get("table"), 6728 catalog=this.args.get("db"), 6729 ), 6730 comments=this.comments, 6731 ) 6732 elif isinstance(field, exp.Window): 6733 # Move the exp.Dot's to the window's function 6734 window_func = self.expression(exp.Dot(this=this, expression=field.this)) 6735 field.set("this", window_func) 6736 this = field 6737 else: 6738 this = self.expression(exp.Dot(this=this, expression=field)) 6739 6740 if field and field.comments: 6741 t.cast(exp.Expr, this).add_comments(field.pop_comments()) 6742 6743 this = self._parse_bracket(this) 6744 6745 return this 6746 6747 def _parse_paren(self) -> exp.Expr | None: 6748 if not self._match(TokenType.L_PAREN): 6749 return None 6750 6751 comments = self._prev_comments 6752 query = self._parse_select() 6753 6754 if query: 6755 expressions = [query] 6756 else: 6757 expressions = self._parse_expressions() 6758 6759 this = seq_get(expressions, 0) 6760 6761 if not this and self._match(TokenType.R_PAREN, advance=False): 6762 this = self.expression(exp.Tuple()) 6763 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6764 this = self._parse_subquery(this=this, parse_alias=False) 6765 elif isinstance(this, (exp.Subquery, exp.Values)): 6766 this = self._parse_subquery( 6767 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6768 parse_alias=False, 6769 ) 6770 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6771 this = self.expression(exp.Tuple(expressions=expressions)) 6772 else: 6773 this = self.expression(exp.Paren(this=this)) 6774 6775 if this: 6776 this.add_comments(comments) 6777 6778 self._match_r_paren(expression=this) 6779 6780 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6781 return self._parse_window(this) 6782 6783 return this 6784 6785 def _parse_primary(self) -> exp.Expr | None: 6786 if self._match_set(self.PRIMARY_PARSERS): 6787 token_type = self._prev.token_type 6788 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6789 6790 if token_type == TokenType.STRING: 6791 expressions = [primary] 6792 while self._match(TokenType.STRING): 6793 expressions.append(exp.Literal.string(self._prev.text)) 6794 6795 if len(expressions) > 1: 6796 return self.expression( 6797 exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE) 6798 ) 6799 6800 return primary 6801 6802 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6803 return exp.Literal.number(f"0.{self._prev.text}") 6804 6805 return self._parse_paren() 6806 6807 def _parse_field( 6808 self, 6809 any_token: bool = False, 6810 tokens: t.Collection[TokenType] | None = None, 6811 anonymous_func: bool = False, 6812 ) -> exp.Expr | None: 6813 if anonymous_func: 6814 field = ( 6815 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6816 or self._parse_primary() 6817 ) 6818 else: 6819 field = self._parse_primary() or self._parse_function( 6820 anonymous=anonymous_func, any_token=any_token 6821 ) 6822 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6823 6824 def _parse_function( 6825 self, 6826 functions: dict[str, t.Callable] | None = None, 6827 anonymous: bool = False, 6828 optional_parens: bool = True, 6829 any_token: bool = False, 6830 ) -> exp.Expr | None: 6831 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6832 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6833 fn_syntax = False 6834 if ( 6835 self._match(TokenType.L_BRACE, advance=False) 6836 and self._next 6837 and self._next.text.upper() == "FN" 6838 ): 6839 self._advance(2) 6840 fn_syntax = True 6841 6842 func = self._parse_function_call( 6843 functions=functions, 6844 anonymous=anonymous, 6845 optional_parens=optional_parens, 6846 any_token=any_token, 6847 ) 6848 6849 if fn_syntax: 6850 self._match(TokenType.R_BRACE) 6851 6852 return func 6853 6854 def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]: 6855 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6856 6857 def _parse_function_call( 6858 self, 6859 functions: dict[str, t.Callable] | None = None, 6860 anonymous: bool = False, 6861 optional_parens: bool = True, 6862 any_token: bool = False, 6863 ) -> exp.Expr | None: 6864 if not self._curr: 6865 return None 6866 6867 comments = self._curr.comments 6868 prev = self._prev 6869 token = self._curr 6870 token_type = self._curr.token_type 6871 this: str | exp.Expr = self._curr.text 6872 upper = self._curr.text.upper() 6873 6874 after_dot = prev.token_type == TokenType.DOT 6875 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6876 if ( 6877 optional_parens 6878 and parser 6879 and token_type not in self.INVALID_FUNC_NAME_TOKENS 6880 and not after_dot 6881 ): 6882 self._advance() 6883 return self._parse_window(parser(self)) 6884 6885 if self._next.token_type != TokenType.L_PAREN: 6886 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS and not after_dot: 6887 self._advance() 6888 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]()) 6889 6890 return None 6891 6892 if any_token: 6893 if token_type in self.RESERVED_TOKENS: 6894 return None 6895 elif token_type not in self.FUNC_TOKENS: 6896 return None 6897 6898 self._advance(2) 6899 6900 parser = self.FUNCTION_PARSERS.get(upper) 6901 if parser and not anonymous: 6902 result = parser(self) 6903 else: 6904 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6905 6906 if subquery_predicate: 6907 expr = None 6908 if self._curr.token_type in self.SUBQUERY_TOKENS: 6909 expr = self._parse_select() 6910 self._match_r_paren() 6911 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6912 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6913 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6914 self._advance(-1) 6915 expr = self._parse_bitwise() 6916 6917 if expr: 6918 return self.expression(subquery_predicate(this=expr), comments=comments) 6919 6920 if functions is None: 6921 functions = self.FUNCTIONS 6922 6923 function = functions.get(upper) 6924 known_function = function and not anonymous 6925 6926 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6927 args = self._parse_function_args(alias) 6928 6929 post_func_comments = self._curr.comments if self._curr else None 6930 if known_function and post_func_comments: 6931 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6932 # call we'll construct it as exp.Anonymous, even if it's "known" 6933 if any( 6934 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6935 for comment in post_func_comments 6936 ): 6937 known_function = False 6938 6939 if alias and known_function: 6940 args = self._kv_to_prop_eq(args) 6941 6942 if known_function: 6943 func_builder = t.cast(t.Callable, function) 6944 6945 # mypyc compiled functions don't have __code__, so we use 6946 # try/except to check if func_builder accepts 'dialect'. 6947 try: 6948 func = func_builder(args) 6949 except TypeError: 6950 func = func_builder(args, dialect=self.dialect) 6951 6952 func = self.validate_expression(func, args) 6953 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6954 func.meta["name"] = this 6955 6956 result = func 6957 else: 6958 if token_type == TokenType.IDENTIFIER: 6959 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6960 6961 result = self.expression(exp.Anonymous(this=this, expressions=args)) 6962 6963 result = result.update_positions(token) 6964 6965 if isinstance(result, exp.Expr): 6966 result.add_comments(comments) 6967 6968 if parser: 6969 self._match(TokenType.R_PAREN, expression=result) 6970 else: 6971 self._match_r_paren(result) 6972 return self._parse_window(result) 6973 6974 def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: 6975 return expression 6976 6977 def _kv_to_prop_eq( 6978 self, expressions: list[exp.Expr], parse_map: bool = False 6979 ) -> list[exp.Expr]: 6980 transformed = [] 6981 6982 for index, e in enumerate(expressions): 6983 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6984 if isinstance(e, exp.Alias): 6985 e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this)) 6986 6987 if not isinstance(e, exp.PropertyEQ): 6988 e = self.expression( 6989 exp.PropertyEQ( 6990 this=e.this if parse_map else exp.to_identifier(e.this.name), 6991 expression=e.expression, 6992 ) 6993 ) 6994 6995 if isinstance(e.this, exp.Column): 6996 e.this.replace(e.this.this) 6997 else: 6998 e = self._to_prop_eq(e, index) 6999 7000 transformed.append(e) 7001 7002 return transformed 7003 7004 def _parse_function_properties(self) -> exp.Properties | None: 7005 return self._parse_properties() 7006 7007 def _parse_user_defined_function_expression(self) -> exp.Expr | None: 7008 return self._parse_statement() 7009 7010 def _parse_function_parameter(self) -> exp.Expr | None: 7011 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 7012 7013 def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None: 7014 this = self._parse_table_parts(schema=True) 7015 7016 if not self._match(TokenType.L_PAREN): 7017 return this 7018 7019 expressions = self._parse_csv(self._parse_function_parameter) 7020 self._match_r_paren() 7021 return self.expression( 7022 exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True) 7023 ) 7024 7025 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 7026 literal = self._parse_primary() 7027 if literal: 7028 return self.expression(exp.Introducer(this=token.text, expression=literal), token) 7029 7030 return self._identifier_expression(token) 7031 7032 def _parse_session_parameter(self) -> exp.SessionParameter: 7033 kind = None 7034 this = self._parse_id_var() or self._parse_primary() 7035 7036 if this and self._match(TokenType.DOT): 7037 kind = this.name 7038 this = self._parse_var() or self._parse_primary() 7039 7040 return self.expression(exp.SessionParameter(this=this, kind=kind)) 7041 7042 def _parse_lambda_arg(self) -> exp.Expr | None: 7043 return self._parse_id_var() 7044 7045 def _parse_lambda(self, alias: bool = False) -> exp.Expr | None: 7046 next_token_type = self._next.token_type 7047 7048 # Fast path: simple atom (column, literal, null, bool) followed by , or ) 7049 if ( 7050 next_token_type in self.LAMBDA_ARG_TERMINATORS 7051 and (atom := self._parse_atom()) is not None 7052 ): 7053 return atom 7054 7055 index = self._index 7056 7057 if self._match(TokenType.L_PAREN): 7058 expressions = t.cast( 7059 list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg) 7060 ) 7061 7062 if not self._match(TokenType.R_PAREN): 7063 self._retreat(index) 7064 elif self._match_set(self.LAMBDAS): 7065 return self.LAMBDAS[self._prev.token_type](self, expressions) 7066 else: 7067 self._retreat(index) 7068 elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS: 7069 expressions = [self._parse_lambda_arg()] 7070 7071 if self._match_set(self.LAMBDAS): 7072 return self.LAMBDAS[self._prev.token_type](self, expressions) 7073 7074 self._retreat(index) 7075 7076 this: exp.Expr | None 7077 7078 if self._match(TokenType.DISTINCT): 7079 this = self.expression( 7080 exp.Distinct(expressions=self._parse_csv(self._parse_disjunction)) 7081 ) 7082 else: 7083 self._match(TokenType.ALL) # ALL is the default/no-op aggregate modifier (SQL-92) 7084 this = self._parse_select_or_expression(alias=alias) 7085 7086 return self._parse_limit( 7087 self._parse_respect_or_ignore_nulls( 7088 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 7089 ) 7090 ) 7091 7092 def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None: 7093 index = self._index 7094 if not self._match(TokenType.L_PAREN): 7095 return this 7096 7097 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 7098 # expr can be of both types 7099 if self._match_set(self.SELECT_START_TOKENS): 7100 self._retreat(index) 7101 return this 7102 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 7103 self._match_r_paren() 7104 return self.expression(exp.Schema(this=this, expressions=args)) 7105 7106 def _parse_field_def(self) -> exp.Expr | None: 7107 return self._parse_column_def(self._parse_field(any_token=True)) 7108 7109 def _parse_column_def( 7110 self, this: exp.Expr | None, computed_column: bool = True 7111 ) -> exp.Expr | None: 7112 # column defs are not really columns, they're identifiers 7113 if isinstance(this, exp.Column): 7114 this = this.this 7115 7116 if not computed_column: 7117 self._match(TokenType.ALIAS) 7118 7119 kind = self._parse_types(schema=True) 7120 7121 if self._match_text_seq("FOR", "ORDINALITY"): 7122 return self.expression(exp.ColumnDef(this=this, ordinality=True)) 7123 7124 constraints: list[exp.Expr] = [] 7125 7126 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 7127 ("ALIAS", "MATERIALIZED") 7128 ): 7129 persisted = self._prev.text.upper() == "MATERIALIZED" 7130 constraint_kind = exp.ComputedColumnConstraint( 7131 this=self._parse_disjunction(), 7132 persisted=persisted or self._match_text_seq("PERSISTED"), 7133 data_type=exp.Var(this="AUTO") 7134 if self._match_text_seq("AUTO") 7135 else self._parse_types(), 7136 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 7137 ) 7138 constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind))) 7139 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 7140 in_out_constraint = self.expression( 7141 exp.InOutColumnConstraint( 7142 input_=self._match(TokenType.IN), output=self._match(TokenType.OUT) 7143 ) 7144 ) 7145 constraints.append(in_out_constraint) 7146 kind = self._parse_types() 7147 elif ( 7148 kind 7149 and self._match(TokenType.ALIAS, advance=False) 7150 and ( 7151 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 7152 or self._next.token_type == TokenType.L_PAREN 7153 ) 7154 ): 7155 self._advance() 7156 constraints.append( 7157 self.expression( 7158 exp.ColumnConstraint( 7159 kind=exp.ComputedColumnConstraint( 7160 this=self._parse_disjunction(), 7161 persisted=self._match_texts(("STORED", "VIRTUAL")) 7162 and self._prev.text.upper() == "STORED", 7163 ) 7164 ) 7165 ) 7166 ) 7167 7168 while True: 7169 constraint = self._parse_column_constraint() 7170 if not constraint: 7171 break 7172 constraints.append(constraint) 7173 7174 if not kind and not constraints: 7175 return this 7176 7177 position = None 7178 if self._match_texts(("FIRST", "AFTER")): 7179 pos = self._prev.text 7180 position = self.expression(exp.ColumnPosition(this=self._parse_column(), position=pos)) 7181 7182 return self.expression( 7183 exp.ColumnDef(this=this, kind=kind, constraints=constraints, position=position) 7184 ) 7185 7186 def _parse_auto_increment( 7187 self, 7188 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 7189 start = None 7190 increment = None 7191 order = None 7192 7193 if self._match(TokenType.L_PAREN, advance=False): 7194 args = self._parse_wrapped_csv(self._parse_bitwise) 7195 start = seq_get(args, 0) 7196 increment = seq_get(args, 1) 7197 elif self._match_text_seq("START"): 7198 start = self._parse_bitwise() 7199 self._match_text_seq("INCREMENT") 7200 increment = self._parse_bitwise() 7201 if self._match_text_seq("ORDER"): 7202 order = True 7203 elif self._match_text_seq("NOORDER"): 7204 order = False 7205 7206 if start and increment: 7207 return exp.GeneratedAsIdentityColumnConstraint( 7208 start=start, increment=increment, this=False, order=order 7209 ) 7210 7211 return exp.AutoIncrementColumnConstraint() 7212 7213 def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None: 7214 if not self._match(TokenType.L_PAREN, advance=False): 7215 return None 7216 7217 return self.expression( 7218 exp.CheckColumnConstraint( 7219 this=self._parse_wrapped(self._parse_assignment), 7220 enforced=self._match_text_seq("ENFORCED"), 7221 ) 7222 ) 7223 7224 def _parse_auto_property(self) -> exp.AutoRefreshProperty | None: 7225 if not self._match_text_seq("REFRESH"): 7226 self._retreat(self._index - 1) 7227 return None 7228 return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True))) 7229 7230 def _parse_compress(self) -> exp.CompressColumnConstraint: 7231 if self._match(TokenType.L_PAREN, advance=False): 7232 return self.expression( 7233 exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise)) 7234 ) 7235 7236 return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise())) 7237 7238 def _parse_generated_as_identity( 7239 self, 7240 ) -> ( 7241 exp.GeneratedAsIdentityColumnConstraint 7242 | exp.ComputedColumnConstraint 7243 | exp.GeneratedAsRowColumnConstraint 7244 ): 7245 if self._match_text_seq("BY", "DEFAULT"): 7246 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 7247 this = self.expression( 7248 exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null) 7249 ) 7250 else: 7251 self._match_text_seq("ALWAYS") 7252 this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True)) 7253 7254 self._match(TokenType.ALIAS) 7255 7256 if self._match_text_seq("ROW"): 7257 start = self._match_text_seq("START") 7258 if not start: 7259 self._match(TokenType.END) 7260 hidden = self._match_text_seq("HIDDEN") 7261 return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden)) 7262 7263 identity = self._match_text_seq("IDENTITY") 7264 7265 if self._match(TokenType.L_PAREN): 7266 if self._match(TokenType.START_WITH): 7267 this.set("start", self._parse_bitwise()) 7268 if self._match_text_seq("INCREMENT", "BY"): 7269 this.set("increment", self._parse_bitwise()) 7270 if self._match_text_seq("MINVALUE"): 7271 this.set("minvalue", self._parse_bitwise()) 7272 if self._match_text_seq("MAXVALUE"): 7273 this.set("maxvalue", self._parse_bitwise()) 7274 7275 if self._match_text_seq("CYCLE"): 7276 this.set("cycle", True) 7277 elif self._match_text_seq("NO", "CYCLE"): 7278 this.set("cycle", False) 7279 7280 if not identity: 7281 this.set("expression", self._parse_range()) 7282 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 7283 args = self._parse_csv(self._parse_bitwise) 7284 this.set("start", seq_get(args, 0)) 7285 this.set("increment", seq_get(args, 1)) 7286 7287 self._match_r_paren() 7288 7289 return this 7290 7291 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 7292 self._match_text_seq("LENGTH") 7293 return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise())) 7294 7295 def _parse_not_constraint(self) -> exp.Expr | None: 7296 if self._match_text_seq("NULL"): 7297 return self.expression(exp.NotNullColumnConstraint()) 7298 if self._match_text_seq("CASESPECIFIC"): 7299 return self.expression(exp.CaseSpecificColumnConstraint(not_=True)) 7300 if self._match_text_seq("FOR", "REPLICATION"): 7301 return self.expression(exp.NotForReplicationColumnConstraint()) 7302 7303 # Unconsume the `NOT` token 7304 self._retreat(self._index - 1) 7305 return None 7306 7307 def _parse_column_constraint(self) -> exp.Expr | None: 7308 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 7309 7310 procedure_option_follows = ( 7311 self._match(TokenType.WITH, advance=False) 7312 and self._next 7313 and self._next.text.upper() in self.PROCEDURE_OPTIONS 7314 ) 7315 7316 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 7317 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 7318 if not constraint: 7319 self._retreat(self._index - 1) 7320 return None 7321 7322 return self.expression(exp.ColumnConstraint(this=this, kind=constraint)) 7323 7324 return this 7325 7326 def _parse_constraint(self) -> exp.Expr | None: 7327 if not self._match(TokenType.CONSTRAINT): 7328 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 7329 7330 return self.expression( 7331 exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints()) 7332 ) 7333 7334 def _parse_unnamed_constraints(self) -> list[exp.Expr]: 7335 constraints = [] 7336 while True: 7337 constraint = self._parse_unnamed_constraint() or self._parse_function() 7338 if not constraint: 7339 break 7340 constraints.append(constraint) 7341 7342 return constraints 7343 7344 def _parse_unnamed_constraint( 7345 self, constraints: t.Collection[str] | None = None 7346 ) -> exp.Expr | None: 7347 index = self._index 7348 7349 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 7350 constraints or self.CONSTRAINT_PARSERS 7351 ): 7352 return None 7353 7354 constraint_key = self._prev.text.upper() 7355 if constraint_key not in self.CONSTRAINT_PARSERS: 7356 self.raise_error(f"No parser found for schema constraint {constraint_key}.") 7357 7358 result = self.CONSTRAINT_PARSERS[constraint_key](self) 7359 if not result: 7360 self._retreat(index) 7361 7362 return result 7363 7364 def _parse_unique_key(self) -> exp.Expr | None: 7365 if ( 7366 self._curr 7367 and self._curr.token_type != TokenType.IDENTIFIER 7368 and self._curr.text.upper() in self.CONSTRAINT_PARSERS 7369 ): 7370 return None 7371 return self._parse_id_var(any_token=False) 7372 7373 def _parse_unique(self) -> exp.UniqueColumnConstraint: 7374 self._match_texts(("KEY", "INDEX")) 7375 return self.expression( 7376 exp.UniqueColumnConstraint( 7377 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 7378 this=self._parse_schema(self._parse_unique_key()), 7379 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 7380 on_conflict=self._parse_on_conflict(), 7381 options=self._parse_key_constraint_options(), 7382 ) 7383 ) 7384 7385 def _parse_key_constraint_options(self) -> list[str]: 7386 options = [] 7387 while True: 7388 if not self._curr: 7389 break 7390 7391 if self._match(TokenType.ON): 7392 action = None 7393 on = self._advance_any() and self._prev.text 7394 7395 if self._match_text_seq("NO", "ACTION"): 7396 action = "NO ACTION" 7397 elif self._match_text_seq("CASCADE"): 7398 action = "CASCADE" 7399 elif self._match_text_seq("RESTRICT"): 7400 action = "RESTRICT" 7401 elif self._match_pair(TokenType.SET, TokenType.NULL): 7402 action = "SET NULL" 7403 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 7404 action = "SET DEFAULT" 7405 else: 7406 self.raise_error("Invalid key constraint") 7407 7408 options.append(f"ON {on} {action}") 7409 else: 7410 var = self._parse_var_from_options( 7411 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 7412 ) 7413 if not var: 7414 break 7415 options.append(var.name) 7416 7417 return options 7418 7419 def _parse_references(self, match: bool = True) -> exp.Reference | None: 7420 if match and not self._match(TokenType.REFERENCES): 7421 return None 7422 7423 expressions: list | None = None 7424 this = self._parse_table(schema=True) 7425 options = self._parse_key_constraint_options() 7426 return self.expression(exp.Reference(this=this, expressions=expressions, options=options)) 7427 7428 def _parse_foreign_key(self) -> exp.ForeignKey: 7429 expressions = ( 7430 self._parse_wrapped_id_vars() 7431 if not self._match(TokenType.REFERENCES, advance=False) 7432 else None 7433 ) 7434 reference = self._parse_references() 7435 on_options = {} 7436 7437 while self._match(TokenType.ON): 7438 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 7439 self.raise_error("Expected DELETE or UPDATE") 7440 7441 kind = self._prev.text.lower() 7442 7443 if self._match_text_seq("NO", "ACTION"): 7444 action = "NO ACTION" 7445 elif self._match(TokenType.SET): 7446 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 7447 action = "SET " + self._prev.text.upper() 7448 else: 7449 self._advance() 7450 action = self._prev.text.upper() 7451 7452 on_options[kind] = action 7453 7454 return self.expression( 7455 exp.ForeignKey( 7456 expressions=expressions, 7457 reference=reference, 7458 options=self._parse_key_constraint_options(), 7459 **on_options, 7460 ) 7461 ) 7462 7463 def _parse_primary_key_part(self) -> exp.Expr | None: 7464 return self._parse_field() 7465 7466 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None: 7467 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 7468 self._retreat(self._index - 1) 7469 return None 7470 7471 id_vars = self._parse_wrapped_id_vars() 7472 return self.expression( 7473 exp.PeriodForSystemTimeConstraint( 7474 this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1) 7475 ) 7476 ) 7477 7478 def _parse_primary_key( 7479 self, 7480 wrapped_optional: bool = False, 7481 in_props: bool = False, 7482 named_primary_key: bool = False, 7483 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 7484 desc = ( 7485 self._prev.token_type == TokenType.DESC 7486 if self._match_set((TokenType.ASC, TokenType.DESC)) 7487 else None 7488 ) 7489 7490 this = None 7491 if ( 7492 named_primary_key 7493 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 7494 and self._next 7495 and self._next.token_type == TokenType.L_PAREN 7496 ): 7497 this = self._parse_id_var() 7498 7499 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 7500 return self.expression( 7501 exp.PrimaryKeyColumnConstraint( 7502 desc=desc, options=self._parse_key_constraint_options() 7503 ) 7504 ) 7505 7506 expressions = self._parse_wrapped_csv( 7507 self._parse_primary_key_part, optional=wrapped_optional 7508 ) 7509 7510 return self.expression( 7511 exp.PrimaryKey( 7512 this=this, 7513 expressions=expressions, 7514 include=self._parse_index_params(), 7515 options=self._parse_key_constraint_options(), 7516 ) 7517 ) 7518 7519 def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None: 7520 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 7521 7522 def _parse_odbc_datetime_literal(self) -> exp.Expr: 7523 """ 7524 Parses a datetime column in ODBC format. We parse the column into the corresponding 7525 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 7526 same as we did for `DATE('yyyy-mm-dd')`. 7527 7528 Reference: 7529 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 7530 """ 7531 self._match(TokenType.VAR) 7532 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 7533 expression = self.expression(exp_class(this=self._parse_string())) 7534 if not self._match(TokenType.R_BRACE): 7535 self.raise_error("Expected }") 7536 return expression 7537 7538 def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None: 7539 if not self._match_set(self.BRACKETS): 7540 return this 7541 7542 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 7543 map_token = seq_get(self._tokens, self._index - 2) 7544 parse_map = map_token is not None and map_token.text.upper() == "MAP" 7545 else: 7546 parse_map = False 7547 7548 bracket_kind = self._prev.token_type 7549 if ( 7550 bracket_kind == TokenType.L_BRACE 7551 and self._curr 7552 and self._curr.token_type == TokenType.VAR 7553 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 7554 ): 7555 return self._parse_odbc_datetime_literal() 7556 7557 expressions = self._parse_csv( 7558 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 7559 ) 7560 7561 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 7562 self.raise_error("Expected ]") 7563 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 7564 self.raise_error("Expected }") 7565 7566 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 7567 if bracket_kind == TokenType.L_BRACE: 7568 this = self.expression( 7569 exp.Struct( 7570 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map) 7571 ) 7572 ) 7573 elif not this: 7574 this = build_array_constructor( 7575 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 7576 ) 7577 else: 7578 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 7579 if constructor_type: 7580 return build_array_constructor( 7581 constructor_type, 7582 args=expressions, 7583 bracket_kind=bracket_kind, 7584 dialect=self.dialect, 7585 ) 7586 7587 expressions = apply_index_offset( 7588 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 7589 ) 7590 this = self.expression( 7591 exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments() 7592 ) 7593 7594 self._add_comments(this) 7595 return self._parse_bracket(this) 7596 7597 def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None: 7598 if not self._match(TokenType.COLON): 7599 return this 7600 7601 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 7602 self._advance() 7603 end: exp.Expr | None = -exp.Literal.number("1") 7604 else: 7605 end = self._parse_assignment() 7606 step = self._parse_unary() if self._match(TokenType.COLON) else None 7607 return self.expression(exp.Slice(this=this, expression=end, step=step)) 7608 7609 def _parse_case(self) -> exp.Expr | None: 7610 if self._match(TokenType.DOT, advance=False): 7611 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 7612 self._retreat(self._index - 1) 7613 return None 7614 7615 ifs = [] 7616 default = None 7617 7618 comments = self._prev_comments 7619 expression = self._parse_disjunction() 7620 7621 while self._match(TokenType.WHEN): 7622 this = self._parse_disjunction() 7623 self._match(TokenType.THEN) 7624 then = self._parse_disjunction() 7625 ifs.append(self.expression(exp.If(this=this, true=then))) 7626 7627 if self._match(TokenType.ELSE): 7628 default = self._parse_disjunction() 7629 7630 if not self._match(TokenType.END): 7631 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7632 default = exp.column("interval") 7633 else: 7634 self.raise_error("Expected END after CASE", self._prev) 7635 7636 return self.expression( 7637 exp.Case(this=expression, ifs=ifs, default=default), comments=comments 7638 ) 7639 7640 def _parse_if(self) -> exp.Expr | None: 7641 if self._match(TokenType.L_PAREN): 7642 args = self._parse_csv( 7643 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7644 ) 7645 this = self.validate_expression(exp.If.from_arg_list(args), args) 7646 self._match_r_paren() 7647 else: 7648 index = self._index - 1 7649 7650 if self.NO_PAREN_IF_COMMANDS and index == 0: 7651 return self._parse_as_command(self._prev) 7652 7653 condition = self._parse_disjunction() 7654 7655 if not condition: 7656 self._retreat(index) 7657 return None 7658 7659 self._match(TokenType.THEN) 7660 true = self._parse_disjunction() 7661 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7662 self._match(TokenType.END) 7663 this = self.expression(exp.If(this=condition, true=true, false=false)) 7664 7665 return this 7666 7667 def _parse_next_value_for(self) -> exp.Expr | None: 7668 if not self._match_text_seq("VALUE", "FOR"): 7669 self._retreat(self._index - 1) 7670 return None 7671 7672 return self.expression( 7673 exp.NextValueFor( 7674 this=self._parse_column(), 7675 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7676 ) 7677 ) 7678 7679 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7680 this = self._parse_function() or self._parse_var_or_string(upper=True) 7681 7682 if self._match(TokenType.FROM): 7683 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7684 7685 if not self._match(TokenType.COMMA): 7686 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7687 7688 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7689 7690 def _parse_gap_fill(self) -> exp.GapFill: 7691 self._match(TokenType.TABLE) 7692 this = self._parse_table() 7693 7694 self._match(TokenType.COMMA) 7695 args = [this, *self._parse_csv(self._parse_lambda)] 7696 7697 gap_fill = exp.GapFill.from_arg_list(args) 7698 return self.validate_expression(gap_fill, args) 7699 7700 def _parse_char(self) -> exp.Chr: 7701 return self.expression( 7702 exp.Chr( 7703 expressions=self._parse_csv(self._parse_assignment), 7704 charset=self._match(TokenType.USING) and self._parse_charset_name(), 7705 ) 7706 ) 7707 7708 def _parse_charset_name(self) -> exp.Expr | None: 7709 """ 7710 Parse a charset name after USING or CHARACTER SET. Dialects that need to preserve quoting 7711 for specific name shapes override this. 7712 """ 7713 return self._parse_var( 7714 tokens={TokenType.BINARY, TokenType.IDENTIFIER}, 7715 ) 7716 7717 def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr: 7718 this = self._parse_assignment() 7719 7720 if not self._match(TokenType.ALIAS): 7721 if self._match(TokenType.COMMA): 7722 return self.expression(exp.CastToStrType(this=this, to=self._parse_string())) 7723 7724 self.raise_error("Expected AS after CAST") 7725 7726 fmt = None 7727 to = self._parse_types(with_collation=True) 7728 7729 default = None 7730 if self._match(TokenType.DEFAULT): 7731 default = self._parse_bitwise() 7732 self._match_text_seq("ON", "CONVERSION", "ERROR") 7733 7734 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7735 fmt_string = self._parse_wrapped(self._parse_string, optional=True) 7736 fmt = self._parse_at_time_zone(fmt_string) 7737 7738 if not to: 7739 to = exp.DType.UNKNOWN.into_expr() 7740 if to.this in exp.DataType.TEMPORAL_TYPES: 7741 this = self.expression( 7742 (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)( 7743 this=this, 7744 format=exp.Literal.string( 7745 format_time( 7746 fmt_string.this if fmt_string else "", 7747 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7748 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7749 ) 7750 ), 7751 safe=safe, 7752 ) 7753 ) 7754 7755 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7756 this.set("zone", fmt.args["zone"]) 7757 return this 7758 elif not to: 7759 self.raise_error("Expected TYPE after CAST") 7760 elif isinstance(to, exp.Identifier): 7761 to = exp.DataType.from_str(to.name, dialect=self.dialect, udt=True) 7762 elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET): 7763 to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string()) 7764 7765 return self.build_cast( 7766 strict=strict, 7767 this=this, 7768 to=to, 7769 format=fmt, 7770 safe=safe, 7771 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7772 default=default, 7773 ) 7774 7775 def _parse_string_agg(self) -> exp.GroupConcat: 7776 if self._match(TokenType.DISTINCT): 7777 args: list[exp.Expr | None] = [ 7778 self.expression(exp.Distinct(expressions=[self._parse_disjunction()])) 7779 ] 7780 if self._match(TokenType.COMMA): 7781 args.extend(self._parse_csv(self._parse_disjunction)) 7782 else: 7783 args = self._parse_csv(self._parse_disjunction) # type: ignore 7784 7785 if self._match_text_seq("ON", "OVERFLOW"): 7786 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7787 if self._match_text_seq("ERROR"): 7788 on_overflow: exp.Expr | None = exp.var("ERROR") 7789 else: 7790 self._match_text_seq("TRUNCATE") 7791 on_overflow = self.expression( 7792 exp.OverflowTruncateBehavior( 7793 this=self._parse_string(), 7794 with_count=( 7795 self._match_text_seq("WITH", "COUNT") 7796 or not self._match_text_seq("WITHOUT", "COUNT") 7797 ), 7798 ) 7799 ) 7800 else: 7801 on_overflow = None 7802 7803 index = self._index 7804 if not self._match(TokenType.R_PAREN) and args: 7805 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7806 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7807 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7808 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7809 return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1))) 7810 7811 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7812 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7813 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7814 if not self._match_text_seq("WITHIN", "GROUP"): 7815 self._retreat(index) 7816 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7817 7818 # The corresponding match_r_paren will be called in parse_function (caller) 7819 self._match_l_paren() 7820 7821 return self.expression( 7822 exp.GroupConcat( 7823 this=self._parse_order(this=seq_get(args, 0)), 7824 separator=seq_get(args, 1), 7825 on_overflow=on_overflow, 7826 ) 7827 ) 7828 7829 def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None: 7830 this = self._parse_bitwise() 7831 7832 if self._match(TokenType.USING): 7833 to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_charset_name()) 7834 elif self._match(TokenType.COMMA): 7835 to = self._parse_types() 7836 else: 7837 to = None 7838 7839 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7840 7841 def _parse_xml_element(self) -> exp.XMLElement: 7842 if self._match_text_seq("EVALNAME"): 7843 evalname = True 7844 this = self._parse_bitwise() 7845 else: 7846 evalname = None 7847 self._match_text_seq("NAME") 7848 this = self._parse_id_var() 7849 7850 return self.expression( 7851 exp.XMLElement( 7852 this=this, 7853 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7854 evalname=evalname, 7855 ) 7856 ) 7857 7858 def _parse_xml_table(self) -> exp.XMLTable: 7859 namespaces = None 7860 passing = None 7861 columns = None 7862 7863 if self._match_text_seq("XMLNAMESPACES", "("): 7864 namespaces = self._parse_xml_namespace() 7865 self._match_text_seq(")", ",") 7866 7867 this = self._parse_string() 7868 7869 if self._match_text_seq("PASSING"): 7870 # The BY VALUE keywords are optional and are provided for semantic clarity 7871 self._match_text_seq("BY", "VALUE") 7872 passing = self._parse_csv(self._parse_column) 7873 7874 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7875 7876 if self._match_text_seq("COLUMNS"): 7877 columns = self._parse_csv(self._parse_field_def) 7878 7879 return self.expression( 7880 exp.XMLTable( 7881 this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref 7882 ) 7883 ) 7884 7885 def _parse_xml_namespace(self) -> list[exp.XMLNamespace]: 7886 namespaces = [] 7887 7888 while True: 7889 if self._match(TokenType.DEFAULT): 7890 uri = self._parse_string() 7891 else: 7892 uri = self._parse_alias(self._parse_string()) 7893 namespaces.append(self.expression(exp.XMLNamespace(this=uri))) 7894 if not self._match(TokenType.COMMA): 7895 break 7896 7897 return namespaces 7898 7899 def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None: 7900 args = self._parse_csv(self._parse_disjunction) 7901 7902 if len(args) < 3: 7903 return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1))) 7904 7905 return self.expression(exp.DecodeCase(expressions=args)) 7906 7907 def _parse_json_key_value(self) -> exp.JSONKeyValue | None: 7908 self._match_text_seq("KEY") 7909 key = self._parse_column() 7910 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7911 self._match_text_seq("VALUE") 7912 value = self._parse_bitwise() 7913 7914 if not key and not value: 7915 return None 7916 return self.expression(exp.JSONKeyValue(this=key, expression=value)) 7917 7918 def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None: 7919 if not this or not self._match_text_seq("FORMAT", "JSON"): 7920 return this 7921 7922 return self.expression(exp.FormatJson(this=this)) 7923 7924 def _parse_on_condition(self) -> exp.OnCondition | None: 7925 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7926 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7927 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7928 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7929 else: 7930 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7931 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7932 7933 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7934 7935 if not empty and not error and not null: 7936 return None 7937 7938 return self.expression(exp.OnCondition(empty=empty, error=error, null=null)) 7939 7940 def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None: 7941 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7942 for value in values: 7943 if self._match_text_seq(value, "ON", on): 7944 return f"{value} ON {on}" 7945 7946 index = self._index 7947 if self._match(TokenType.DEFAULT): 7948 default_value = self._parse_bitwise() 7949 if self._match_text_seq("ON", on): 7950 return default_value 7951 7952 self._retreat(index) 7953 7954 return None 7955 7956 @t.overload 7957 def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... 7958 7959 @t.overload 7960 def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... 7961 7962 def _parse_json_object(self, agg=False): 7963 star = self._parse_star() 7964 expressions = ( 7965 [star] 7966 if star 7967 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7968 ) 7969 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7970 7971 unique_keys = None 7972 if self._match_text_seq("WITH", "UNIQUE"): 7973 unique_keys = True 7974 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7975 unique_keys = False 7976 7977 self._match_text_seq("KEYS") 7978 7979 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7980 self._parse_type() 7981 ) 7982 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7983 7984 return self.expression( 7985 (exp.JSONObjectAgg if agg else exp.JSONObject)( 7986 expressions=expressions, 7987 null_handling=null_handling, 7988 unique_keys=unique_keys, 7989 return_type=return_type, 7990 encoding=encoding, 7991 ) 7992 ) 7993 7994 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7995 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7996 if not self._match_text_seq("NESTED"): 7997 this = self._parse_id_var() 7998 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7999 kind = self._parse_types(allow_identifiers=False) 8000 nested = None 8001 else: 8002 this = None 8003 ordinality = None 8004 kind = None 8005 nested = True 8006 8007 format_json = self._match_text_seq("FORMAT", "JSON") 8008 path = self._match_text_seq("PATH") and self._parse_string() 8009 nested_schema = nested and self._parse_json_schema() 8010 8011 return self.expression( 8012 exp.JSONColumnDef( 8013 this=this, 8014 kind=kind, 8015 path=path, 8016 nested_schema=nested_schema, 8017 ordinality=ordinality, 8018 format_json=format_json, 8019 ) 8020 ) 8021 8022 def _parse_json_schema(self) -> exp.JSONSchema: 8023 self._match_text_seq("COLUMNS") 8024 return self.expression( 8025 exp.JSONSchema( 8026 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True) 8027 ) 8028 ) 8029 8030 def _parse_json_table(self) -> exp.JSONTable: 8031 this = self._parse_format_json(self._parse_bitwise()) 8032 path = self._match(TokenType.COMMA) and self._parse_string() 8033 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 8034 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 8035 schema = self._parse_json_schema() 8036 8037 return exp.JSONTable( 8038 this=this, 8039 schema=schema, 8040 path=path, 8041 error_handling=error_handling, 8042 empty_handling=empty_handling, 8043 ) 8044 8045 def _parse_match_against(self) -> exp.MatchAgainst: 8046 if self._match_text_seq("TABLE"): 8047 # parse SingleStore MATCH(TABLE ...) syntax 8048 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 8049 expressions = [] 8050 table = self._parse_table() 8051 if table: 8052 expressions = [table] 8053 else: 8054 expressions = self._parse_csv(self._parse_column) 8055 8056 self._match_text_seq(")", "AGAINST", "(") 8057 8058 this = self._parse_string() 8059 8060 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 8061 modifier = "IN NATURAL LANGUAGE MODE" 8062 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8063 modifier = f"{modifier} WITH QUERY EXPANSION" 8064 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 8065 modifier = "IN BOOLEAN MODE" 8066 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8067 modifier = "WITH QUERY EXPANSION" 8068 else: 8069 modifier = None 8070 8071 return self.expression( 8072 exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier) 8073 ) 8074 8075 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 8076 def _parse_open_json(self) -> exp.OpenJSON: 8077 this = self._parse_bitwise() 8078 path = self._match(TokenType.COMMA) and self._parse_string() 8079 8080 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 8081 this = self._parse_field(any_token=True) 8082 kind = self._parse_types() 8083 path = self._parse_string() 8084 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 8085 8086 return self.expression( 8087 exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json) 8088 ) 8089 8090 expressions = None 8091 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 8092 self._match_l_paren() 8093 expressions = self._parse_csv(_parse_open_json_column_def) 8094 8095 return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions)) 8096 8097 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 8098 args = self._parse_csv(self._parse_bitwise) 8099 8100 if self._match(TokenType.IN): 8101 return self.expression( 8102 exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0)) 8103 ) 8104 8105 if haystack_first: 8106 haystack = seq_get(args, 0) 8107 needle = seq_get(args, 1) 8108 else: 8109 haystack = seq_get(args, 1) 8110 needle = seq_get(args, 0) 8111 8112 return self.expression( 8113 exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 8114 ) 8115 8116 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 8117 args = self._parse_csv(self._parse_table) 8118 return exp.JoinHint(this=func_name.upper(), expressions=args) 8119 8120 def _parse_substring(self) -> exp.Substring: 8121 # Postgres supports the form: substring(string [from int] [for int]) 8122 # (despite being undocumented, the reverse order also works) 8123 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 8124 8125 args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise)) 8126 8127 start, length = None, None 8128 8129 while self._curr: 8130 if self._match(TokenType.FROM): 8131 start = self._parse_bitwise() 8132 elif self._match(TokenType.FOR): 8133 if not start: 8134 start = exp.Literal.number(1) 8135 length = self._parse_bitwise() 8136 else: 8137 break 8138 8139 if start: 8140 args.append(start) 8141 if length: 8142 args.append(length) 8143 8144 return self.validate_expression(exp.Substring.from_arg_list(args), args) 8145 8146 def _parse_trim(self) -> exp.Trim: 8147 # https://www.w3resource.com/sql/character-functions/trim.php 8148 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 8149 8150 position = None 8151 collation = None 8152 expression = None 8153 8154 if self._match_texts(self.TRIM_TYPES): 8155 position = self._prev.text.upper() 8156 8157 this = self._parse_bitwise() 8158 if self._match_set((TokenType.FROM, TokenType.COMMA)): 8159 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 8160 expression = self._parse_bitwise() 8161 8162 if invert_order: 8163 this, expression = expression, this 8164 8165 if self._match(TokenType.COLLATE): 8166 collation = self._parse_bitwise() 8167 8168 return self.expression( 8169 exp.Trim(this=this, position=position, expression=expression, collation=collation) 8170 ) 8171 8172 def _parse_window_clause(self) -> list[exp.Expr] | None: 8173 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 8174 8175 def _parse_named_window(self) -> exp.Expr | None: 8176 return self._parse_window(self._parse_id_var(), alias=True) 8177 8178 def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None: 8179 if self._curr.token_type == TokenType.VAR: 8180 if self._match_text_seq("IGNORE", "NULLS"): 8181 return self.expression(exp.IgnoreNulls(this=this)) 8182 if self._match_text_seq("RESPECT", "NULLS"): 8183 return self.expression(exp.RespectNulls(this=this)) 8184 return this 8185 8186 def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None: 8187 if self._match(TokenType.HAVING): 8188 self._match_texts(("MAX", "MIN")) 8189 max = self._prev.text.upper() != "MIN" 8190 return self.expression( 8191 exp.HavingMax(this=this, expression=self._parse_column(), max=max) 8192 ) 8193 8194 return this 8195 8196 def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None: 8197 func = this 8198 comments = func.comments if isinstance(func, exp.Expr) else None 8199 8200 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 8201 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 8202 if self._match_text_seq("WITHIN", "GROUP"): 8203 order = self._parse_wrapped(self._parse_order) 8204 this = self.expression(exp.WithinGroup(this=this, expression=order)) 8205 8206 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 8207 self._match(TokenType.WHERE) 8208 this = self.expression( 8209 exp.Filter(this=this, expression=self._parse_where(skip_where_token=True)) 8210 ) 8211 self._match_r_paren() 8212 8213 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 8214 # Some dialects choose to implement and some do not. 8215 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 8216 8217 # There is some code above in _parse_lambda that handles 8218 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 8219 8220 # The below changes handle 8221 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 8222 8223 # Oracle allows both formats 8224 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 8225 # and Snowflake chose to do the same for familiarity 8226 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 8227 if isinstance(this, exp.AggFunc): 8228 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 8229 8230 if ignore_respect and ignore_respect is not this: 8231 ignore_respect.replace(ignore_respect.this) 8232 this = self.expression(ignore_respect.__class__(this=this)) 8233 8234 this = self._parse_respect_or_ignore_nulls(this) 8235 8236 # bigquery select from window x AS (partition by ...) 8237 if alias: 8238 over = None 8239 self._match(TokenType.ALIAS) 8240 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 8241 return this 8242 else: 8243 over = self._prev.text.upper() 8244 8245 if comments and isinstance(func, exp.Expr): 8246 func.pop_comments() 8247 8248 if not self._match(TokenType.L_PAREN): 8249 return self.expression( 8250 exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments 8251 ) 8252 8253 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 8254 8255 first: bool | None = True if self._match(TokenType.FIRST) else None 8256 if self._match_text_seq("LAST"): 8257 first = False 8258 8259 partition, order = self._parse_partition_and_order() 8260 kind = ( 8261 self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS") 8262 ) and self._prev.text 8263 8264 if kind: 8265 self._match(TokenType.BETWEEN) 8266 start = self._parse_window_spec() 8267 8268 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 8269 exclude = ( 8270 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 8271 if self._match_text_seq("EXCLUDE") 8272 else None 8273 ) 8274 8275 spec = self.expression( 8276 exp.WindowSpec( 8277 kind=kind, 8278 start=start["value"], 8279 start_side=start["side"], 8280 end=end.get("value"), 8281 end_side=end.get("side"), 8282 exclude=exclude, 8283 ) 8284 ) 8285 else: 8286 spec = None 8287 8288 self._match_r_paren() 8289 8290 window = self.expression( 8291 exp.Window( 8292 this=this, 8293 partition_by=partition, 8294 order=order, 8295 spec=spec, 8296 alias=window_alias, 8297 over=over, 8298 first=first, 8299 ), 8300 comments=comments, 8301 ) 8302 8303 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 8304 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 8305 return self._parse_window(window, alias=alias) 8306 8307 return window 8308 8309 def _parse_partition_and_order( 8310 self, 8311 ) -> tuple[list[exp.Expr], exp.Expr | None]: 8312 return self._parse_partition_by(), self._parse_order() 8313 8314 def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]: 8315 self._match(TokenType.BETWEEN) 8316 8317 return { 8318 "value": ( 8319 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 8320 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 8321 or self._parse_bitwise() 8322 ), 8323 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 8324 } 8325 8326 def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None: 8327 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 8328 # so this section tries to parse the clause version and if it fails, it treats the token 8329 # as an identifier (alias) 8330 if self._can_parse_limit_or_offset(): 8331 return this 8332 8333 # WINDOW is in ID_VAR_TOKENS, so it can be consumed as an implicit alias. Detect the 8334 # named-window clause shape (`WINDOW <ident> AS (...)`) and avoid swallowing it. 8335 if self._can_parse_named_window(): 8336 return this 8337 8338 any_token = self._match(TokenType.ALIAS) 8339 comments = self._prev_comments 8340 8341 if explicit and not any_token: 8342 return this 8343 8344 if self._match(TokenType.L_PAREN): 8345 aliases = self.expression( 8346 exp.Aliases( 8347 this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token)) 8348 ), 8349 comments=comments, 8350 ) 8351 self._match_r_paren(aliases) 8352 return aliases 8353 8354 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 8355 self.STRING_ALIASES and self._parse_string_as_identifier() 8356 ) 8357 8358 if alias: 8359 comments.extend(alias.pop_comments()) 8360 this = self.expression(exp.Alias(this=this, alias=alias), comments=comments) 8361 column = this.this 8362 8363 # Moves the comment next to the alias in `expr /* comment */ AS alias` 8364 if not this.comments and column and column.comments: 8365 this.comments = column.pop_comments() 8366 8367 return this 8368 8369 def _parse_id_var( 8370 self, 8371 any_token: bool = True, 8372 tokens: t.Collection[TokenType] | None = None, 8373 ) -> exp.Expr | None: 8374 expression = self._parse_identifier() 8375 if not expression and ( 8376 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 8377 ): 8378 quoted = self._prev.token_type == TokenType.STRING 8379 expression = self._identifier_expression(quoted=quoted) 8380 8381 return expression 8382 8383 def _parse_string(self) -> exp.Expr | None: 8384 if self._match_set(self.STRING_PARSERS): 8385 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 8386 return self._parse_placeholder() 8387 8388 def _parse_string_as_identifier(self) -> exp.Identifier | None: 8389 if not self._match(TokenType.STRING): 8390 return None 8391 output = exp.to_identifier(self._prev.text, quoted=True) 8392 output.update_positions(self._prev) 8393 return output 8394 8395 def _parse_number(self) -> exp.Expr | None: 8396 if self._match_set(self.NUMERIC_PARSERS): 8397 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 8398 return self._parse_placeholder() 8399 8400 def _parse_identifier(self) -> exp.Expr | None: 8401 if self._match(TokenType.IDENTIFIER): 8402 return self._identifier_expression(quoted=True) 8403 return self._parse_placeholder() 8404 8405 def _parse_var( 8406 self, 8407 any_token: bool = False, 8408 tokens: t.Collection[TokenType] | None = None, 8409 upper: bool = False, 8410 ) -> exp.Expr | None: 8411 if ( 8412 (any_token and self._advance_any()) 8413 or self._match(TokenType.VAR) 8414 or (self._match_set(tokens) if tokens else False) 8415 ): 8416 return self.expression( 8417 exp.Var(this=self._prev.text.upper() if upper else self._prev.text) 8418 ) 8419 return self._parse_placeholder() 8420 8421 def _advance_any(self, ignore_reserved: bool = False) -> Token | None: 8422 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 8423 self._advance() 8424 return self._prev 8425 return None 8426 8427 def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None: 8428 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 8429 8430 def _parse_primary_or_var(self) -> exp.Expr | None: 8431 return self._parse_primary() or self._parse_var(any_token=True) 8432 8433 def _parse_null(self) -> exp.Expr | None: 8434 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 8435 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 8436 return self._parse_placeholder() 8437 8438 def _parse_boolean(self) -> exp.Expr | None: 8439 if self._match(TokenType.TRUE): 8440 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 8441 if self._match(TokenType.FALSE): 8442 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 8443 return self._parse_placeholder() 8444 8445 def _parse_star(self) -> exp.Expr | None: 8446 if self._match(TokenType.STAR): 8447 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 8448 return self._parse_placeholder() 8449 8450 def _parse_parameter(self) -> exp.Parameter: 8451 this = self._parse_identifier() or self._parse_primary_or_var() 8452 return self.expression(exp.Parameter(this=this)) 8453 8454 def _parse_placeholder(self) -> exp.Expr | None: 8455 if self._match_set(self.PLACEHOLDER_PARSERS): 8456 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 8457 if placeholder: 8458 return placeholder 8459 self._advance(-1) 8460 return None 8461 8462 def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None: 8463 if not self._match_texts(keywords): 8464 return None 8465 if self._match(TokenType.L_PAREN, advance=False): 8466 return self._parse_wrapped_csv(self._parse_expression) 8467 8468 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 8469 return [expression] if expression else None 8470 8471 def _parse_csv( 8472 self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA 8473 ) -> list[T]: 8474 parse_result = parse_method() 8475 items = [parse_result] if parse_result is not None else [] 8476 8477 while self._match(sep): 8478 if isinstance(parse_result, exp.Expr): 8479 self._add_comments(parse_result) 8480 parse_result = parse_method() 8481 if parse_result is not None: 8482 items.append(parse_result) 8483 8484 return items 8485 8486 def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]: 8487 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 8488 8489 def _parse_wrapped_csv( 8490 self, 8491 parse_method: t.Callable[[], T | None], 8492 sep: TokenType = TokenType.COMMA, 8493 optional: bool = False, 8494 ) -> list[T]: 8495 return self._parse_wrapped( 8496 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 8497 ) 8498 8499 def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T: 8500 wrapped = self._match(TokenType.L_PAREN) 8501 if not wrapped and not optional: 8502 self.raise_error("Expecting (") 8503 parse_result = parse_method() 8504 if wrapped: 8505 self._match_r_paren() 8506 return parse_result 8507 8508 def _parse_expressions(self) -> list[exp.Expr]: 8509 return self._parse_csv(self._parse_expression) 8510 8511 def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None: 8512 return ( 8513 self._parse_set_operations( 8514 self._parse_alias(self._parse_assignment(), explicit=True) 8515 if alias 8516 else self._parse_assignment() 8517 ) 8518 or self._parse_select() 8519 ) 8520 8521 def _parse_ddl_select(self) -> exp.Expr | None: 8522 return self._parse_query_modifiers( 8523 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 8524 ) 8525 8526 def _parse_transaction(self) -> exp.Transaction | exp.Command: 8527 this = None 8528 if self._match_texts(self.TRANSACTION_KIND): 8529 this = self._prev.text 8530 8531 self._match_texts(("TRANSACTION", "WORK")) 8532 8533 modes = [] 8534 while True: 8535 mode = [] 8536 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 8537 mode.append(self._prev.text) 8538 8539 if mode: 8540 modes.append(" ".join(mode)) 8541 if not self._match(TokenType.COMMA): 8542 break 8543 8544 return self.expression(exp.Transaction(this=this, modes=modes)) 8545 8546 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 8547 chain = None 8548 savepoint = None 8549 is_rollback = self._prev.token_type == TokenType.ROLLBACK 8550 8551 self._match_texts(("TRANSACTION", "WORK")) 8552 8553 if self._match_text_seq("TO"): 8554 self._match_text_seq("SAVEPOINT") 8555 savepoint = self._parse_id_var() 8556 8557 if self._match(TokenType.AND): 8558 chain = not self._match_text_seq("NO") 8559 self._match_text_seq("CHAIN") 8560 8561 if is_rollback: 8562 return self.expression(exp.Rollback(savepoint=savepoint)) 8563 8564 return self.expression(exp.Commit(chain=chain)) 8565 8566 def _parse_refresh(self) -> exp.Refresh | exp.Command: 8567 if self._match(TokenType.TABLE): 8568 kind = "TABLE" 8569 elif self._match_text_seq("MATERIALIZED", "VIEW"): 8570 kind = "MATERIALIZED VIEW" 8571 else: 8572 kind = "" 8573 8574 this = self._parse_string() or self._parse_table() 8575 if not kind and not isinstance(this, exp.Literal): 8576 return self._parse_as_command(self._prev) 8577 8578 return self.expression(exp.Refresh(this=this, kind=kind)) 8579 8580 def _parse_column_def_with_exists(self): 8581 start = self._index 8582 self._match(TokenType.COLUMN) 8583 8584 exists_column = self._parse_exists(not_=True) 8585 expression = self._parse_field_def() 8586 8587 if not isinstance(expression, exp.ColumnDef): 8588 self._retreat(start) 8589 return None 8590 8591 expression.set("exists", exists_column) 8592 8593 return expression 8594 8595 def _parse_add_column(self) -> exp.ColumnDef | None: 8596 if not self._prev.text.upper() == "ADD": 8597 return None 8598 8599 return self._parse_column_def_with_exists() 8600 8601 def _parse_drop_column(self) -> exp.Drop | exp.Command | None: 8602 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8603 if drop and not isinstance(drop, exp.Command): 8604 drop.set("kind", drop.args.get("kind", "COLUMN")) 8605 return drop 8606 8607 def _parse_alter_drop_action(self) -> exp.Expr | None: 8608 return self._parse_drop_column() 8609 8610 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8611 def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition: 8612 return self.expression( 8613 exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists) 8614 ) 8615 8616 def _parse_alter_table_add(self) -> list[exp.Expr]: 8617 def _parse_add_alteration() -> exp.Expr | None: 8618 self._match_text_seq("ADD") 8619 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8620 return self.expression( 8621 exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint)) 8622 ) 8623 8624 column_def = self._parse_add_column() 8625 if isinstance(column_def, exp.ColumnDef): 8626 return column_def 8627 8628 exists = self._parse_exists(not_=True) 8629 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8630 return self.expression( 8631 exp.AddPartition( 8632 exists=exists, 8633 this=self._parse_field(any_token=True), 8634 location=self._match_text_seq("LOCATION", advance=False) 8635 and self._parse_property(), 8636 ) 8637 ) 8638 8639 return None 8640 8641 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8642 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8643 or self._match_text_seq("COLUMNS") 8644 ): 8645 schema = self._parse_schema() 8646 8647 return ( 8648 ensure_list(schema) 8649 if schema 8650 else self._parse_csv(self._parse_column_def_with_exists) 8651 ) 8652 8653 return self._parse_csv(_parse_add_alteration) 8654 8655 def _parse_alter_table_alter(self) -> exp.Expr | None: 8656 if self._match_texts(self.ALTER_ALTER_PARSERS): 8657 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8658 8659 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8660 # keyword after ALTER we default to parsing this statement 8661 self._match(TokenType.COLUMN) 8662 column = self._parse_field(any_token=True) 8663 8664 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8665 return self.expression(exp.AlterColumn(this=column, drop=True)) 8666 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8667 return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction())) 8668 if self._match(TokenType.COMMENT): 8669 return self.expression(exp.AlterColumn(this=column, comment=self._parse_string())) 8670 if self._match_text_seq("DROP", "NOT", "NULL"): 8671 return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True)) 8672 if self._match_text_seq("SET", "NOT", "NULL"): 8673 return self.expression(exp.AlterColumn(this=column, allow_null=False)) 8674 8675 if self._match_text_seq("SET", "VISIBLE"): 8676 return self.expression(exp.AlterColumn(this=column, visible="VISIBLE")) 8677 if self._match_text_seq("SET", "INVISIBLE"): 8678 return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE")) 8679 8680 self._match_text_seq("SET", "DATA") 8681 self._match_text_seq("TYPE") 8682 return self.expression( 8683 exp.AlterColumn( 8684 this=column, 8685 dtype=self._parse_types(), 8686 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8687 using=self._match(TokenType.USING) and self._parse_disjunction(), 8688 ) 8689 ) 8690 8691 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8692 if self._match_texts(("ALL", "EVEN", "AUTO")): 8693 return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper()))) 8694 8695 self._match_text_seq("KEY", "DISTKEY") 8696 return self.expression(exp.AlterDistStyle(this=self._parse_column())) 8697 8698 def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey: 8699 if compound: 8700 self._match_text_seq("SORTKEY") 8701 8702 if self._match(TokenType.L_PAREN, advance=False): 8703 return self.expression( 8704 exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound) 8705 ) 8706 8707 self._match_texts(("AUTO", "NONE")) 8708 return self.expression( 8709 exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound) 8710 ) 8711 8712 def _parse_alter_table_drop(self) -> list[exp.Expr]: 8713 index = self._index - 1 8714 8715 partition_exists = self._parse_exists() 8716 if self._match(TokenType.PARTITION, advance=False): 8717 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8718 8719 self._retreat(index) 8720 return self._parse_csv(self._parse_alter_drop_action) 8721 8722 def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None: 8723 if self._match(TokenType.COLUMN) or ( 8724 not self.ALTER_RENAME_REQUIRES_COLUMN and not self._match_text_seq("TO", advance=False) 8725 ): 8726 exists = self._parse_exists() 8727 old_column = self._parse_column() 8728 to = self._match_text_seq("TO") 8729 new_column = self._parse_column() 8730 8731 if old_column is None or not to or new_column is None: 8732 return None 8733 8734 return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists)) 8735 8736 self._match_text_seq("TO") 8737 return self.expression(exp.AlterRename(this=self._parse_table(schema=True))) 8738 8739 def _parse_alter_table_set(self) -> exp.AlterSet: 8740 alter_set = self.expression(exp.AlterSet()) 8741 8742 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8743 "TABLE", "PROPERTIES" 8744 ): 8745 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8746 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8747 alter_set.set("expressions", [self._parse_assignment()]) 8748 elif self._match_texts(("LOGGED", "UNLOGGED")): 8749 alter_set.set("option", exp.var(self._prev.text.upper())) 8750 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8751 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8752 elif self._match_text_seq("LOCATION"): 8753 alter_set.set("location", self._parse_field()) 8754 elif self._match_text_seq("ACCESS", "METHOD"): 8755 alter_set.set("access_method", self._parse_field()) 8756 elif self._match_text_seq("TABLESPACE"): 8757 alter_set.set("tablespace", self._parse_field()) 8758 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8759 alter_set.set("file_format", [self._parse_field()]) 8760 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8761 alter_set.set("file_format", self._parse_wrapped_options()) 8762 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8763 alter_set.set("copy_options", self._parse_wrapped_options()) 8764 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8765 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8766 else: 8767 if self._match_text_seq("SERDE"): 8768 alter_set.set("serde", self._parse_field()) 8769 8770 properties = self._parse_wrapped(self._parse_properties, optional=True) 8771 alter_set.set("expressions", [properties]) 8772 8773 return alter_set 8774 8775 def _parse_alter_session(self) -> exp.AlterSession: 8776 """Parse ALTER SESSION SET/UNSET statements.""" 8777 if self._match(TokenType.SET): 8778 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8779 return self.expression(exp.AlterSession(expressions=expressions, unset=False)) 8780 8781 self._match_text_seq("UNSET") 8782 expressions = self._parse_csv( 8783 lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True))) 8784 ) 8785 return self.expression(exp.AlterSession(expressions=expressions, unset=True)) 8786 8787 def _parse_alter(self) -> exp.Alter | exp.Command: 8788 start = self._prev 8789 8790 iceberg = self._match_text_seq("ICEBERG") 8791 8792 alter_token = self._match_set(self.ALTERABLES) and self._prev 8793 if not alter_token: 8794 return self._parse_as_command(start) 8795 if iceberg and alter_token.token_type != TokenType.TABLE: 8796 return self._parse_as_command(start) 8797 8798 exists = self._parse_exists() 8799 only = self._match_text_seq("ONLY") 8800 8801 if alter_token.token_type == TokenType.SESSION: 8802 this = None 8803 check = None 8804 cluster = None 8805 else: 8806 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8807 check = self._match_text_seq("WITH", "CHECK") 8808 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8809 8810 if self._next: 8811 self._advance() 8812 8813 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8814 if parser: 8815 actions = ensure_list(parser(self)) 8816 not_valid = self._match_text_seq("NOT", "VALID") 8817 options = self._parse_csv(self._parse_property) 8818 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8819 8820 if not self._curr and actions: 8821 return self.expression( 8822 exp.Alter( 8823 this=this, 8824 kind=alter_token.text.upper(), 8825 exists=exists, 8826 actions=actions, 8827 only=only, 8828 options=options, 8829 cluster=cluster, 8830 not_valid=not_valid, 8831 check=check, 8832 cascade=cascade, 8833 iceberg=iceberg, 8834 ) 8835 ) 8836 8837 return self._parse_as_command(start) 8838 8839 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8840 start = self._prev 8841 # https://duckdb.org/docs/sql/statements/analyze 8842 if not self._curr: 8843 return self.expression(exp.Analyze()) 8844 8845 options = [] 8846 while self._match_texts(self.ANALYZE_STYLES): 8847 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8848 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8849 else: 8850 options.append(self._prev.text.upper()) 8851 8852 this: exp.Expr | None = None 8853 inner_expression: exp.Expr | None = None 8854 8855 kind = self._curr.text.upper() if self._curr else None 8856 8857 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8858 this = self._parse_table_parts() 8859 elif self._match_text_seq("TABLES"): 8860 if self._match_set((TokenType.FROM, TokenType.IN)): 8861 kind = f"{kind} {self._prev.text.upper()}" 8862 this = self._parse_table(schema=True, is_db_reference=True) 8863 elif self._match_text_seq("DATABASE"): 8864 this = self._parse_table(schema=True, is_db_reference=True) 8865 elif self._match_text_seq("CLUSTER"): 8866 this = self._parse_table() 8867 # Try matching inner expr keywords before fallback to parse table. 8868 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8869 kind = None 8870 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8871 else: 8872 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8873 kind = None 8874 this = self._parse_table_parts() 8875 8876 partition = self._try_parse(self._parse_partition) 8877 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8878 return self._parse_as_command(start) 8879 8880 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8881 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8882 "WITH", "ASYNC", "MODE" 8883 ): 8884 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8885 else: 8886 mode = None 8887 8888 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8889 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8890 8891 properties = self._parse_properties() 8892 return self.expression( 8893 exp.Analyze( 8894 kind=kind, 8895 this=this, 8896 mode=mode, 8897 partition=partition, 8898 properties=properties, 8899 expression=inner_expression, 8900 options=options, 8901 ) 8902 ) 8903 8904 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8905 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8906 this = None 8907 kind = self._prev.text.upper() 8908 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8909 expressions = [] 8910 8911 if not self._match_text_seq("STATISTICS"): 8912 self.raise_error("Expecting token STATISTICS") 8913 8914 if self._match_text_seq("NOSCAN"): 8915 this = "NOSCAN" 8916 elif self._match(TokenType.FOR): 8917 if self._match_text_seq("ALL", "COLUMNS"): 8918 this = "FOR ALL COLUMNS" 8919 if self._match_texts("COLUMNS"): 8920 this = "FOR COLUMNS" 8921 expressions = self._parse_csv(self._parse_column_reference) 8922 elif self._match_text_seq("SAMPLE"): 8923 sample = self._parse_number() 8924 expressions = [ 8925 self.expression( 8926 exp.AnalyzeSample( 8927 sample=sample, 8928 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8929 ) 8930 ) 8931 ] 8932 8933 return self.expression( 8934 exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions) 8935 ) 8936 8937 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8938 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8939 kind = None 8940 this = None 8941 expression: exp.Expr | None = None 8942 if self._match_text_seq("REF", "UPDATE"): 8943 kind = "REF" 8944 this = "UPDATE" 8945 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8946 this = "UPDATE SET DANGLING TO NULL" 8947 elif self._match_text_seq("STRUCTURE"): 8948 kind = "STRUCTURE" 8949 if self._match_text_seq("CASCADE", "FAST"): 8950 this = "CASCADE FAST" 8951 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8952 ("ONLINE", "OFFLINE") 8953 ): 8954 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8955 expression = self._parse_into() 8956 8957 return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression)) 8958 8959 def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None: 8960 this = self._prev.text.upper() 8961 if self._match_text_seq("COLUMNS"): 8962 return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}")) 8963 return None 8964 8965 def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None: 8966 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8967 if self._match_text_seq("STATISTICS"): 8968 return self.expression(exp.AnalyzeDelete(kind=kind)) 8969 return None 8970 8971 def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None: 8972 if self._match_text_seq("CHAINED", "ROWS"): 8973 return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into())) 8974 return None 8975 8976 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8977 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8978 this = self._prev.text.upper() 8979 expression: exp.Expr | None = None 8980 expressions = [] 8981 update_options = None 8982 8983 if self._match_text_seq("HISTOGRAM", "ON"): 8984 expressions = self._parse_csv(self._parse_column_reference) 8985 with_expressions = [] 8986 while self._match(TokenType.WITH): 8987 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8988 if self._match_texts(("SYNC", "ASYNC")): 8989 if self._match_text_seq("MODE", advance=False): 8990 with_expressions.append(f"{self._prev.text.upper()} MODE") 8991 self._advance() 8992 else: 8993 buckets = self._parse_number() 8994 if self._match_text_seq("BUCKETS"): 8995 with_expressions.append(f"{buckets} BUCKETS") 8996 if with_expressions: 8997 expression = self.expression(exp.AnalyzeWith(expressions=with_expressions)) 8998 8999 if self._match_texts(("MANUAL", "AUTO")) and self._match( 9000 TokenType.UPDATE, advance=False 9001 ): 9002 update_options = self._prev.text.upper() 9003 self._advance() 9004 elif self._match_text_seq("USING", "DATA"): 9005 expression = self.expression(exp.UsingData(this=self._parse_string())) 9006 9007 return self.expression( 9008 exp.AnalyzeHistogram( 9009 this=this, 9010 expressions=expressions, 9011 expression=expression, 9012 update_options=update_options, 9013 ) 9014 ) 9015 9016 def _parse_merge(self) -> exp.Merge: 9017 self._match(TokenType.INTO) 9018 target = self._parse_table() 9019 9020 if target and self._match(TokenType.ALIAS, advance=False): 9021 target.set("alias", self._parse_table_alias()) 9022 9023 self._match(TokenType.USING) 9024 using = self._parse_table() 9025 9026 return self.expression( 9027 exp.Merge( 9028 this=target, 9029 using=using, 9030 on=self._match(TokenType.ON) and self._parse_disjunction(), 9031 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 9032 whens=self._parse_when_matched(), 9033 returning=self._parse_returning(), 9034 ) 9035 ) 9036 9037 def _parse_when_matched(self) -> exp.Whens: 9038 whens = [] 9039 9040 while self._match(TokenType.WHEN): 9041 matched = not self._match(TokenType.NOT) 9042 self._match_text_seq("MATCHED") 9043 source = ( 9044 False 9045 if self._match_text_seq("BY", "TARGET") 9046 else self._match_text_seq("BY", "SOURCE") 9047 ) 9048 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 9049 9050 self._match(TokenType.THEN) 9051 9052 if self._match(TokenType.INSERT): 9053 this = self._parse_star() 9054 if this: 9055 then: exp.Expr | None = self.expression(exp.Insert(this=this)) 9056 else: 9057 then = self.expression( 9058 exp.Insert( 9059 this=exp.var("ROW") 9060 if self._match_text_seq("ROW") 9061 else self._parse_value(values=False), 9062 expression=self._match_text_seq("VALUES") and self._parse_value(), 9063 where=self._parse_where(), 9064 ) 9065 ) 9066 elif self._match(TokenType.UPDATE): 9067 expressions = self._parse_star() 9068 if expressions: 9069 then = self.expression(exp.Update(expressions=expressions)) 9070 else: 9071 then = self.expression( 9072 exp.Update( 9073 expressions=self._match(TokenType.SET) 9074 and self._parse_csv(self._parse_equality), 9075 where=self._parse_where(), 9076 ) 9077 ) 9078 elif self._match(TokenType.DELETE): 9079 then = self.expression(exp.Var(this=self._prev.text)) 9080 else: 9081 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 9082 9083 whens.append( 9084 self.expression( 9085 exp.When(matched=matched, source=source, condition=condition, then=then) 9086 ) 9087 ) 9088 return self.expression(exp.Whens(expressions=whens)) 9089 9090 def _parse_show(self) -> exp.Expr | None: 9091 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 9092 if parser: 9093 return parser(self) 9094 return self._parse_as_command(self._prev) 9095 9096 def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None: 9097 index = self._index 9098 9099 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 9100 return self._parse_set_transaction(global_=kind == "GLOBAL") 9101 9102 left = self._parse_primary() or self._parse_column() 9103 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 9104 9105 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 9106 self._retreat(index) 9107 return None 9108 9109 right = self._parse_statement() or self._parse_id_var() 9110 if isinstance(right, (exp.Column, exp.Identifier)): 9111 right = exp.var(right.name) 9112 9113 this = self.expression(exp.EQ(this=left, expression=right)) 9114 return self.expression(exp.SetItem(this=this, kind=kind)) 9115 9116 def _parse_set_transaction(self, global_: bool = False) -> exp.Expr: 9117 self._match_text_seq("TRANSACTION") 9118 characteristics = self._parse_csv( 9119 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 9120 ) 9121 return self.expression( 9122 exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_) 9123 ) 9124 9125 def _parse_set_item(self) -> exp.Expr | None: 9126 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 9127 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 9128 9129 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 9130 index = self._index 9131 set_ = self.expression( 9132 exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag) 9133 ) 9134 9135 if self._curr: 9136 self._retreat(index) 9137 return self._parse_as_command(self._prev) 9138 9139 return set_ 9140 9141 def _parse_var_from_options( 9142 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 9143 ) -> exp.Var | None: 9144 start = self._curr 9145 if not start: 9146 return None 9147 9148 option = start.text.upper() 9149 continuations = options.get(option) 9150 9151 index = self._index 9152 self._advance() 9153 for keywords in continuations or []: 9154 if isinstance(keywords, str): 9155 keywords = (keywords,) 9156 9157 if self._match_text_seq(*keywords): 9158 option = f"{option} {' '.join(keywords)}" 9159 break 9160 else: 9161 if continuations or continuations is None: 9162 if raise_unmatched: 9163 self.raise_error(f"Unknown option {option}") 9164 9165 self._retreat(index) 9166 return None 9167 9168 return exp.var(option) 9169 9170 def _parse_as_command(self, start: Token) -> exp.Command: 9171 while self._curr: 9172 self._advance() 9173 text = self._find_sql(start, self._prev) 9174 size = len(start.text) 9175 self._warn_unsupported() 9176 return exp.Command(this=text[:size], expression=text[size:]) 9177 9178 def _parse_dict_property(self, this: str) -> exp.DictProperty: 9179 settings = [] 9180 9181 self._match_l_paren() 9182 kind = self._parse_id_var() 9183 9184 if self._match(TokenType.L_PAREN): 9185 while True: 9186 key = self._parse_id_var() 9187 value = self._parse_function() or self._parse_primary_or_var() 9188 if not key and value is None: 9189 break 9190 settings.append(self.expression(exp.DictSubProperty(this=key, value=value))) 9191 self._match(TokenType.R_PAREN) 9192 9193 self._match_r_paren() 9194 9195 return self.expression( 9196 exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings) 9197 ) 9198 9199 def _parse_dict_range(self, this: str) -> exp.DictRange: 9200 self._match_l_paren() 9201 has_min = self._match_text_seq("MIN") 9202 if has_min: 9203 min = self._parse_var() or self._parse_primary() 9204 self._match_text_seq("MAX") 9205 max = self._parse_var() or self._parse_primary() 9206 else: 9207 max = self._parse_var() or self._parse_primary() 9208 min = exp.Literal.number(0) 9209 self._match_r_paren() 9210 return self.expression(exp.DictRange(this=this, min=min, max=max)) 9211 9212 def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None: 9213 index = self._index 9214 expression = self._parse_column() 9215 position = self._match(TokenType.COMMA) and self._parse_column() 9216 9217 if not self._match(TokenType.IN): 9218 self._retreat(index - 1) 9219 return None 9220 iterator = self._parse_column() 9221 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 9222 return self.expression( 9223 exp.Comprehension( 9224 this=this, 9225 expression=expression, 9226 position=position, 9227 iterator=iterator, 9228 condition=condition, 9229 ) 9230 ) 9231 9232 def _parse_heredoc(self) -> exp.Heredoc | None: 9233 if self._match(TokenType.HEREDOC_STRING): 9234 return self.expression(exp.Heredoc(this=self._prev.text)) 9235 9236 if not self._match_text_seq("$"): 9237 return None 9238 9239 tags = ["$"] 9240 tag_text = None 9241 9242 if self._is_connected(): 9243 self._advance() 9244 tags.append(self._prev.text.upper()) 9245 else: 9246 self.raise_error("No closing $ found") 9247 9248 if tags[-1] != "$": 9249 if self._is_connected() and self._match_text_seq("$"): 9250 tag_text = tags[-1] 9251 tags.append("$") 9252 else: 9253 self.raise_error("No closing $ found") 9254 9255 heredoc_start = self._curr 9256 9257 while self._curr: 9258 if self._match_text_seq(*tags, advance=False): 9259 this = self._find_sql(heredoc_start, self._prev) 9260 self._advance(len(tags)) 9261 return self.expression(exp.Heredoc(this=this, tag=tag_text)) 9262 9263 self._advance() 9264 9265 self.raise_error(f"No closing {''.join(tags)} found") 9266 return None 9267 9268 def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None: 9269 if not self._curr: 9270 return None 9271 9272 index = self._index 9273 this = [] 9274 while True: 9275 # The current token might be multiple words 9276 curr = self._curr.text.upper() 9277 key = curr.split(" ") 9278 this.append(curr) 9279 9280 self._advance() 9281 result, trie = in_trie(trie, key) 9282 if result == TrieResult.FAILED: 9283 break 9284 9285 if result == TrieResult.EXISTS: 9286 subparser = parsers[" ".join(this)] 9287 return subparser 9288 9289 self._retreat(index) 9290 return None 9291 9292 def _match_l_paren(self, expression: exp.Expr | None = None) -> None: 9293 if not self._match(TokenType.L_PAREN, expression=expression): 9294 self.raise_error("Expecting (") 9295 9296 def _match_r_paren(self, expression: exp.Expr | None = None) -> None: 9297 if not self._match(TokenType.R_PAREN, expression=expression): 9298 self.raise_error("Expecting )") 9299 9300 def _replace_lambda( 9301 self, node: exp.Expr | None, expressions: list[exp.Expr] 9302 ) -> exp.Expr | None: 9303 if not node: 9304 return node 9305 9306 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 9307 9308 for column in node.find_all(exp.Column): 9309 typ = lambda_types.get(column.parts[0].name) 9310 if typ is not None: 9311 dot_or_id = column.to_dot() if column.table else column.this 9312 9313 if typ: 9314 dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ)) 9315 9316 parent = column.parent 9317 9318 while isinstance(parent, exp.Dot): 9319 if not isinstance(parent.parent, exp.Dot): 9320 parent.replace(dot_or_id) 9321 break 9322 parent = parent.parent 9323 else: 9324 if column is node: 9325 node = dot_or_id 9326 else: 9327 column.replace(dot_or_id) 9328 return node 9329 9330 def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr: 9331 start = self._prev 9332 9333 # Not to be confused with TRUNCATE(number, decimals) function call 9334 if self._match(TokenType.L_PAREN): 9335 self._retreat(self._index - 2) 9336 return self._parse_function() 9337 9338 # Clickhouse supports TRUNCATE DATABASE as well 9339 is_database = self._match(TokenType.DATABASE) 9340 9341 self._match(TokenType.TABLE) 9342 9343 exists = self._parse_exists(not_=False) 9344 9345 expressions = self._parse_csv( 9346 lambda: self._parse_table(schema=True, is_db_reference=is_database) 9347 ) 9348 9349 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 9350 9351 if self._match_text_seq("RESTART", "IDENTITY"): 9352 identity = "RESTART" 9353 elif self._match_text_seq("CONTINUE", "IDENTITY"): 9354 identity = "CONTINUE" 9355 else: 9356 identity = None 9357 9358 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 9359 option = self._prev.text 9360 else: 9361 option = None 9362 9363 partition = self._parse_partition() 9364 9365 # Fallback case 9366 if self._curr: 9367 return self._parse_as_command(start) 9368 9369 return self.expression( 9370 exp.TruncateTable( 9371 expressions=expressions, 9372 is_database=is_database, 9373 exists=exists, 9374 cluster=cluster, 9375 identity=identity, 9376 option=option, 9377 partition=partition, 9378 ) 9379 ) 9380 9381 def _parse_with_operator(self) -> exp.Expr | None: 9382 this = self._parse_ordered(self._parse_opclass) 9383 9384 if not self._match(TokenType.WITH): 9385 return this 9386 9387 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 9388 9389 return self.expression(exp.WithOperator(this=this, op=op)) 9390 9391 def _parse_wrapped_options(self) -> list[exp.Expr]: 9392 self._match(TokenType.EQ) 9393 self._match(TokenType.L_PAREN) 9394 9395 opts: list[exp.Expr] = [] 9396 option: exp.Expr | list[exp.Expr] | None 9397 while self._curr and not self._match(TokenType.R_PAREN): 9398 if self._match_text_seq("FORMAT_NAME", "="): 9399 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 9400 option = self._parse_format_name() 9401 else: 9402 option = self._parse_property() 9403 9404 if option is None: 9405 self.raise_error("Unable to parse option") 9406 break 9407 9408 opts.extend(ensure_list(option)) 9409 9410 return opts 9411 9412 def _parse_copy_parameters(self) -> list[exp.CopyParameter]: 9413 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 9414 9415 options = [] 9416 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 9417 option = self._parse_var(any_token=True) 9418 prev = self._prev.text.upper() 9419 9420 # Different dialects might separate options and values by white space, "=" and "AS" 9421 self._match(TokenType.EQ) 9422 self._match(TokenType.ALIAS) 9423 9424 param = self.expression(exp.CopyParameter(this=option)) 9425 9426 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 9427 TokenType.L_PAREN, advance=False 9428 ): 9429 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 9430 param.set("expressions", self._parse_wrapped_options()) 9431 elif prev == "FILE_FORMAT": 9432 # T-SQL's external file format case 9433 param.set("expression", self._parse_field()) 9434 elif ( 9435 prev == "FORMAT" 9436 and self._prev.token_type == TokenType.ALIAS 9437 and self._match_texts(("AVRO", "JSON")) 9438 ): 9439 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 9440 param.set("expression", self._parse_field()) 9441 else: 9442 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 9443 9444 options.append(param) 9445 9446 if sep: 9447 self._match(sep) 9448 9449 return options 9450 9451 def _parse_credentials(self) -> exp.Credentials | None: 9452 expr = self.expression(exp.Credentials()) 9453 9454 if self._match_text_seq("STORAGE_INTEGRATION", "="): 9455 expr.set("storage", self._parse_field()) 9456 if self._match_text_seq("CREDENTIALS"): 9457 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 9458 creds = ( 9459 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 9460 ) 9461 expr.set("credentials", creds) 9462 if self._match_text_seq("ENCRYPTION"): 9463 expr.set("encryption", self._parse_wrapped_options()) 9464 if self._match_text_seq("IAM_ROLE"): 9465 expr.set( 9466 "iam_role", 9467 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 9468 ) 9469 if self._match_text_seq("REGION"): 9470 expr.set("region", self._parse_field()) 9471 9472 return expr 9473 9474 def _parse_file_location(self) -> exp.Expr | None: 9475 return self._parse_field() 9476 9477 def _parse_copy(self) -> exp.Copy | exp.Command: 9478 start = self._prev 9479 9480 self._match(TokenType.INTO) 9481 9482 this = ( 9483 self._parse_select(nested=True, parse_subquery_alias=False) 9484 if self._match(TokenType.L_PAREN, advance=False) 9485 else self._parse_table(schema=True) 9486 ) 9487 9488 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 9489 9490 files = self._parse_csv(self._parse_file_location) 9491 if self._match(TokenType.EQ, advance=False): 9492 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 9493 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 9494 # list via `_parse_wrapped(..)` below. 9495 self._advance(-1) 9496 files = [] 9497 9498 credentials = self._parse_credentials() 9499 9500 self._match_text_seq("WITH") 9501 9502 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 9503 9504 # Fallback case 9505 if self._curr: 9506 return self._parse_as_command(start) 9507 9508 return self.expression( 9509 exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params) 9510 ) 9511 9512 def _parse_normalize(self) -> exp.Normalize: 9513 return self.expression( 9514 exp.Normalize( 9515 this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var() 9516 ) 9517 ) 9518 9519 def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor: 9520 args = self._parse_csv(lambda: self._parse_lambda()) 9521 9522 this = seq_get(args, 0) 9523 decimals = seq_get(args, 1) 9524 9525 return expr_type( 9526 this=this, 9527 decimals=decimals, 9528 to=self._parse_var() if self._match_text_seq("TO") else None, 9529 ) 9530 9531 def _parse_star_ops(self) -> exp.Expr | None: 9532 star_token = self._prev 9533 9534 if self._match_text_seq("COLUMNS", "(", advance=False): 9535 this = self._parse_function() 9536 if isinstance(this, exp.Columns): 9537 this.set("unpack", True) 9538 return this 9539 9540 return self.expression( 9541 exp.Star( 9542 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 9543 replace=self._parse_star_op("REPLACE"), 9544 rename=self._parse_star_op("RENAME"), 9545 ) 9546 ).update_positions(star_token) 9547 9548 def _parse_grant_privilege(self) -> exp.GrantPrivilege | None: 9549 privilege_parts = [] 9550 9551 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 9552 # (end of privilege list) or L_PAREN (start of column list) are met 9553 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 9554 privilege_parts.append(self._curr.text.upper()) 9555 self._advance() 9556 9557 this = exp.var(" ".join(privilege_parts)) 9558 expressions = ( 9559 self._parse_wrapped_csv(self._parse_column) 9560 if self._match(TokenType.L_PAREN, advance=False) 9561 else None 9562 ) 9563 9564 return self.expression(exp.GrantPrivilege(this=this, expressions=expressions)) 9565 9566 def _parse_grant_principal(self) -> exp.GrantPrincipal | None: 9567 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 9568 principal = self._parse_id_var() 9569 9570 if not principal: 9571 return None 9572 9573 return self.expression(exp.GrantPrincipal(this=principal, kind=kind)) 9574 9575 def _parse_grant_revoke_common( 9576 self, 9577 ) -> tuple[list | None, str | None, exp.Expr | None]: 9578 privileges = self._parse_csv(self._parse_grant_privilege) 9579 9580 self._match(TokenType.ON) 9581 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 9582 9583 # Attempt to parse the securable e.g. MySQL allows names 9584 # such as "foo.*", "*.*" which are not easily parseable yet 9585 securable = self._try_parse(self._parse_table_parts) 9586 9587 return privileges, kind, securable 9588 9589 def _parse_grant(self) -> exp.Grant | exp.Command: 9590 start = self._prev 9591 9592 privileges, kind, securable = self._parse_grant_revoke_common() 9593 9594 if not securable or not self._match_text_seq("TO"): 9595 return self._parse_as_command(start) 9596 9597 principals = self._parse_csv(self._parse_grant_principal) 9598 9599 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9600 9601 if self._curr: 9602 return self._parse_as_command(start) 9603 9604 return self.expression( 9605 exp.Grant( 9606 privileges=privileges, 9607 kind=kind, 9608 securable=securable, 9609 principals=principals, 9610 grant_option=grant_option, 9611 ) 9612 ) 9613 9614 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9615 start = self._prev 9616 9617 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9618 9619 privileges, kind, securable = self._parse_grant_revoke_common() 9620 9621 if not securable or not self._match_text_seq("FROM"): 9622 return self._parse_as_command(start) 9623 9624 principals = self._parse_csv(self._parse_grant_principal) 9625 9626 cascade = None 9627 if self._match_texts(("CASCADE", "RESTRICT")): 9628 cascade = self._prev.text.upper() 9629 9630 if self._curr: 9631 return self._parse_as_command(start) 9632 9633 return self.expression( 9634 exp.Revoke( 9635 privileges=privileges, 9636 kind=kind, 9637 securable=securable, 9638 principals=principals, 9639 grant_option=grant_option, 9640 cascade=cascade, 9641 ) 9642 ) 9643 9644 def _parse_overlay(self) -> exp.Overlay: 9645 def _parse_overlay_arg(text: str) -> exp.Expr | None: 9646 return ( 9647 self._parse_bitwise() 9648 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9649 else None 9650 ) 9651 9652 return self.expression( 9653 exp.Overlay( 9654 this=self._parse_bitwise(), 9655 expression=_parse_overlay_arg("PLACING"), 9656 from_=_parse_overlay_arg("FROM"), 9657 for_=_parse_overlay_arg("FOR"), 9658 ) 9659 ) 9660 9661 def _parse_format_name(self) -> exp.Property: 9662 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9663 # for FILE_FORMAT = <format_name> 9664 return self.expression( 9665 exp.Property( 9666 this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts() 9667 ) 9668 ) 9669 9670 def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc: 9671 args: list[exp.Expr] = [] 9672 9673 if self._match(TokenType.DISTINCT): 9674 args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()]))) 9675 self._match(TokenType.COMMA) 9676 9677 args.extend(self._parse_function_args()) 9678 9679 return self.expression( 9680 expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2)) 9681 ) 9682 9683 def _identifier_expression( 9684 self, token: Token | None = None, quoted: bool | None = None 9685 ) -> exp.Identifier: 9686 token = token or self._prev 9687 return self.expression(exp.Identifier(this=token.text, quoted=quoted), token) 9688 9689 def _build_pipe_cte( 9690 self, 9691 query: exp.Query, 9692 expressions: list[exp.Expr], 9693 alias_cte: exp.TableAlias | None = None, 9694 ) -> exp.Select: 9695 new_cte: str | exp.TableAlias | None 9696 if alias_cte: 9697 new_cte = alias_cte 9698 else: 9699 self._pipe_cte_counter += 1 9700 new_cte = f"__tmp{self._pipe_cte_counter}" 9701 9702 with_ = query.args.get("with_") 9703 ctes = with_.pop() if with_ else None 9704 9705 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9706 if ctes: 9707 new_select.set("with_", ctes) 9708 9709 return new_select.with_(new_cte, as_=query, copy=False) 9710 9711 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9712 select = self._parse_select(consume_pipe=False) 9713 if not select: 9714 return query 9715 9716 return self._build_pipe_cte( 9717 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9718 ) 9719 9720 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9721 limit = self._parse_limit() 9722 offset = self._parse_offset() 9723 if limit: 9724 curr_limit = query.args.get("limit", limit) 9725 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9726 query.limit(limit, copy=False) 9727 if offset: 9728 curr_offset = query.args.get("offset") 9729 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9730 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9731 9732 return query 9733 9734 def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None: 9735 this = self._parse_disjunction() 9736 if self._match_text_seq("GROUP", "AND", advance=False): 9737 return this 9738 9739 this = self._parse_alias(this) 9740 9741 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9742 return self._parse_ordered(lambda: this) 9743 9744 return this 9745 9746 def _parse_pipe_syntax_aggregate_group_order_by( 9747 self, query: exp.Select, group_by_exists: bool = True 9748 ) -> exp.Select: 9749 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9750 aggregates_or_groups, orders = [], [] 9751 for element in expr: 9752 if isinstance(element, exp.Ordered): 9753 this = element.this 9754 if isinstance(this, exp.Alias): 9755 element.set("this", this.args["alias"]) 9756 orders.append(element) 9757 else: 9758 this = element 9759 aggregates_or_groups.append(this) 9760 9761 if group_by_exists: 9762 query.select(*aggregates_or_groups, copy=False).group_by( 9763 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9764 copy=False, 9765 ) 9766 else: 9767 query.select(*aggregates_or_groups, append=False, copy=False) 9768 9769 if orders: 9770 return query.order_by(*orders, append=False, copy=False) 9771 9772 return query 9773 9774 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9775 self._match_text_seq("AGGREGATE") 9776 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9777 9778 if self._match(TokenType.GROUP_BY) or ( 9779 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9780 ): 9781 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9782 9783 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9784 9785 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None: 9786 first_setop = self.parse_set_operation(this=query) 9787 if not first_setop: 9788 return None 9789 9790 def _parse_and_unwrap_query() -> exp.Expr | None: 9791 expr = self._parse_paren() 9792 return expr.assert_is(exp.Subquery).unnest() if expr else None 9793 9794 first_setop.this.pop() 9795 9796 setops = [ 9797 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9798 *self._parse_csv(_parse_and_unwrap_query), 9799 ] 9800 9801 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9802 with_ = query.args.get("with_") 9803 ctes = with_.pop() if with_ else None 9804 9805 if isinstance(first_setop, exp.Union): 9806 query = query.union(*setops, copy=False, **first_setop.args) 9807 elif isinstance(first_setop, exp.Except): 9808 query = query.except_(*setops, copy=False, **first_setop.args) 9809 else: 9810 query = query.intersect(*setops, copy=False, **first_setop.args) 9811 9812 query.set("with_", ctes) 9813 9814 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9815 9816 def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None: 9817 join = self._parse_join() 9818 if not join: 9819 return None 9820 9821 if isinstance(query, exp.Select): 9822 return query.join(join, copy=False) 9823 9824 return query 9825 9826 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9827 pivots = self._parse_pivots() 9828 if not pivots: 9829 return query 9830 9831 from_ = query.args.get("from_") 9832 if from_: 9833 from_.this.set("pivots", pivots) 9834 9835 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9836 9837 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9838 self._match_text_seq("EXTEND") 9839 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9840 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9841 9842 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9843 sample = self._parse_table_sample() 9844 9845 with_ = query.args.get("with_") 9846 if with_: 9847 with_.expressions[-1].this.set("sample", sample) 9848 else: 9849 query.set("sample", sample) 9850 9851 return query 9852 9853 def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None: 9854 if isinstance(query, exp.Subquery): 9855 query = exp.select("*").from_(query, copy=False) 9856 9857 if not query.args.get("from_"): 9858 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9859 9860 while self._match(TokenType.PIPE_GT): 9861 start_index = self._index 9862 start_text = self._curr.text.upper() 9863 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text) 9864 if not parser: 9865 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9866 # keywords, making it tricky to disambiguate them without lookahead. The approach 9867 # here is to try and parse a set operation and if that fails, then try to parse a 9868 # join operator. If that fails as well, then the operator is not supported. 9869 parsed_query = self._parse_pipe_syntax_set_operator(query) 9870 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9871 if not parsed_query: 9872 self._retreat(start_index) 9873 self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.") 9874 break 9875 query = parsed_query 9876 else: 9877 query = parser(self, query) 9878 9879 return query 9880 9881 def _parse_declareitem(self) -> exp.DeclareItem | None: 9882 self._match_texts(("VAR", "VARIABLE")) 9883 9884 vars = self._parse_csv(self._parse_id_var) 9885 if not vars: 9886 return None 9887 9888 self._match(TokenType.ALIAS) 9889 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 9890 default = ( 9891 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 9892 ) and self._parse_bitwise() 9893 9894 return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default)) 9895 9896 def _parse_declare(self) -> exp.Declare | exp.Command: 9897 start = self._prev 9898 replace = self._match_text_seq("OR", "REPLACE") 9899 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9900 9901 if not expressions or self._curr: 9902 return self._parse_as_command(start) 9903 9904 return self.expression(exp.Declare(expressions=expressions, replace=replace)) 9905 9906 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9907 exp_class = exp.Cast if strict else exp.TryCast 9908 9909 if exp_class == exp.TryCast: 9910 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9911 9912 return self.expression(exp_class(**kwargs)) 9913 9914 def _parse_json_value(self) -> exp.JSONValue: 9915 this = self._parse_bitwise() 9916 self._match(TokenType.COMMA) 9917 path = self._parse_bitwise() 9918 9919 returning = self._match(TokenType.RETURNING) and self._parse_type() 9920 9921 return self.expression( 9922 exp.JSONValue( 9923 this=this, 9924 path=self.dialect.to_json_path(path), 9925 returning=returning, 9926 on_condition=self._parse_on_condition(), 9927 ) 9928 ) 9929 9930 def _parse_group_concat(self) -> exp.Expr | None: 9931 def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr: 9932 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9933 concat_exprs = [ 9934 self.expression( 9935 exp.Concat( 9936 expressions=node.expressions, 9937 safe=True, 9938 coalesce=self.dialect.CONCAT_COALESCE, 9939 ) 9940 ) 9941 ] 9942 node.set("expressions", concat_exprs) 9943 return node 9944 if len(exprs) == 1: 9945 return exprs[0] 9946 return self.expression( 9947 exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE) 9948 ) 9949 9950 args = self._parse_csv(self._parse_lambda) 9951 9952 if args: 9953 order = args[-1] if isinstance(args[-1], exp.Order) else None 9954 9955 if order: 9956 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9957 # remove 'expr' from exp.Order and add it back to args 9958 args[-1] = order.this 9959 order.set("this", concat_exprs(order.this, args)) 9960 9961 this = order or concat_exprs(args[0], args) 9962 else: 9963 this = None 9964 9965 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9966 9967 return self.expression(exp.GroupConcat(this=this, separator=separator)) 9968 9969 def _parse_initcap(self) -> exp.Initcap: 9970 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9971 9972 # attach dialect's default delimiters 9973 if expr.args.get("expression") is None: 9974 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9975 9976 return expr 9977 9978 def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None: 9979 while True: 9980 if not self._match(TokenType.L_PAREN): 9981 break 9982 9983 op = "" 9984 while self._curr and not self._match(TokenType.R_PAREN): 9985 op += self._curr.text 9986 self._advance() 9987 9988 comments = self._prev_comments 9989 this = self.expression( 9990 exp.Operator(this=this, operator=op, expression=self._parse_bitwise()), 9991 comments=comments, 9992 ) 9993 9994 if not self._match(TokenType.OPERATOR): 9995 break 9996 9997 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.
1843 def __init__( 1844 self, 1845 error_level: ErrorLevel | None = None, 1846 error_message_context: int = 100, 1847 max_errors: int = 3, 1848 max_nodes: int = -1, 1849 dialect: DialectType = None, 1850 ): 1851 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1852 self.error_message_context: int = error_message_context 1853 self.max_errors: int = max_errors 1854 self.max_nodes: int = max_nodes 1855 self.dialect: t.Any = _resolve_dialect(dialect) 1856 self.sql: str = "" 1857 self.errors: list[ParseError] = [] 1858 self._tokens: list[Token] = [] 1859 self._tokens_size: i64 = 0 1860 self._index: i64 = 0 1861 self._curr: Token = SENTINEL_NONE 1862 self._next: Token = SENTINEL_NONE 1863 self._prev: Token = SENTINEL_NONE 1864 self._prev_comments: list[str] = [] 1865 self._pipe_cte_counter: int = 0 1866 self._chunks: list[list[Token]] = [] 1867 self._chunk_index: i64 = 0 1868 self._node_count: int = 0
1870 def reset(self) -> None: 1871 self.sql = "" 1872 self.errors = [] 1873 self._tokens = [] 1874 self._tokens_size = 0 1875 self._index = 0 1876 self._curr = SENTINEL_NONE 1877 self._next = SENTINEL_NONE 1878 self._prev = SENTINEL_NONE 1879 self._prev_comments = [] 1880 self._pipe_cte_counter = 0 1881 self._chunks = [] 1882 self._chunk_index = 0 1883 self._node_count = 0
1973 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1974 token = token or self._curr or self._prev or Token.string("") 1975 formatted_sql, start_context, highlight, end_context = highlight_sql( 1976 sql=self.sql, 1977 positions=[(token.start, token.end)], 1978 context_length=self.error_message_context, 1979 ) 1980 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1981 1982 error = ParseError.new( 1983 formatted_message, 1984 description=message, 1985 line=token.line, 1986 col=token.col, 1987 start_context=start_context, 1988 highlight=highlight, 1989 end_context=end_context, 1990 ) 1991 1992 if self.error_level == ErrorLevel.IMMEDIATE: 1993 raise error 1994 1995 self.errors.append(error)
1997 def validate_expression(self, expression: E, args: list | None = None) -> E: 1998 if self.max_nodes > -1: 1999 self._node_count += 1 2000 if self._node_count > self.max_nodes: 2001 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 2002 if self.error_level != ErrorLevel.IGNORE: 2003 for error_message in expression.error_messages(args): 2004 self.raise_error(error_message) 2005 return expression
2024 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2025 """ 2026 Parses a list of tokens and returns a list of syntax trees, one tree 2027 per parsed SQL statement. 2028 2029 Args: 2030 raw_tokens: The list of tokens. 2031 sql: The original SQL string. 2032 2033 Returns: 2034 The list of the produced syntax trees. 2035 """ 2036 return self._parse( 2037 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2038 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string.
Returns:
The list of the produced syntax trees.
2040 def parse_into( 2041 self, 2042 expression_types: exp.IntoType, 2043 raw_tokens: list[Token], 2044 sql: str | None = None, 2045 ) -> list[exp.Expr | None]: 2046 """ 2047 Parses a list of tokens into a given Expr type. If a collection of Expr 2048 types is given instead, this method will try to parse the token list into each one 2049 of them, stopping at the first for which the parsing succeeds. 2050 2051 Args: 2052 expression_types: The expression type(s) to try and parse the token list into. 2053 raw_tokens: The list of tokens. 2054 sql: The original SQL string, used to produce helpful debug messages. 2055 2056 Returns: 2057 The target Expr. 2058 """ 2059 errors = [] 2060 for expression_type in ensure_list(expression_types): 2061 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2062 if not parser: 2063 raise TypeError(f"No parser registered for {expression_type}") 2064 2065 try: 2066 return self._parse(parser, raw_tokens, sql) 2067 except ParseError as e: 2068 e.errors[0]["into_expression"] = expression_type 2069 errors.append(e) 2070 2071 raise ParseError( 2072 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2073 errors=merge_errors(errors), 2074 ) from errors[-1]
Parses a list of tokens into a given Expr type. If a collection of Expr types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expr.
2076 def check_errors(self) -> None: 2077 """Logs or raises any found errors, depending on the chosen error level setting.""" 2078 if self.error_level == ErrorLevel.WARN: 2079 for error in self.errors: 2080 logger.error(str(error)) 2081 elif self.error_level == ErrorLevel.RAISE and self.errors: 2082 raise ParseError( 2083 concat_messages(self.errors, self.max_errors), 2084 errors=merge_errors(self.errors), 2085 )
Logs or raises any found errors, depending on the chosen error level setting.
2087 def expression( 2088 self, 2089 instance: E, 2090 token: Token | None = None, 2091 comments: list[str] | None = None, 2092 ) -> E: 2093 if token: 2094 instance.update_positions(token) 2095 instance.add_comments(comments) if comments else self._add_comments(instance) 2096 if not instance.is_primitive: 2097 instance = self.validate_expression(instance) 2098 return instance
5620 def parse_set_operation( 5621 self, this: exp.Expr | None, consume_pipe: bool = False 5622 ) -> exp.Expr | None: 5623 start = self._index 5624 _, side_token, kind_token = self._parse_join_parts() 5625 5626 side = side_token.text if side_token else None 5627 kind = kind_token.text if kind_token else None 5628 5629 if not self._match_set(self.SET_OPERATIONS): 5630 self._retreat(start) 5631 return None 5632 5633 token_type = self._prev.token_type 5634 5635 if token_type == TokenType.UNION: 5636 operation: type[exp.SetOperation] = exp.Union 5637 elif token_type == TokenType.EXCEPT: 5638 operation = exp.Except 5639 else: 5640 operation = exp.Intersect 5641 5642 comments = self._prev.comments 5643 5644 if self._match(TokenType.DISTINCT): 5645 distinct: bool | None = True 5646 elif self._match(TokenType.ALL): 5647 distinct = False 5648 else: 5649 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5650 if distinct is None: 5651 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5652 5653 by_name = ( 5654 self._match_text_seq("BY", "NAME") 5655 or self._match_text_seq("STRICT", "CORRESPONDING") 5656 or None 5657 ) 5658 if self._match_text_seq("CORRESPONDING"): 5659 by_name = True 5660 if not side and not kind: 5661 kind = "INNER" 5662 5663 on_column_list = None 5664 if by_name and self._match_texts(("ON", "BY")): 5665 on_column_list = self._parse_wrapped_csv(self._parse_column) 5666 5667 expression = self._parse_select( 5668 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5669 ) 5670 5671 return self.expression( 5672 operation( 5673 this=this, 5674 distinct=distinct, 5675 by_name=by_name, 5676 expression=expression, 5677 side=side, 5678 kind=kind, 5679 on=on_column_list, 5680 ), 5681 comments=comments, 5682 )