sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ( 11 ErrorLevel, 12 ParseError, 13 TokenError, 14 concat_messages, 15 highlight_sql, 16 merge_errors, 17) 18from sqlglot.helper import apply_index_offset, ensure_list, seq_get 19from sqlglot.time import format_time 20from sqlglot.tokens import Token, Tokenizer, TokenType 21from sqlglot.trie import TrieResult, in_trie, new_trie 22 23if t.TYPE_CHECKING: 24 from sqlglot._typing import E, Lit 25 from sqlglot.dialects.dialect import Dialect, DialectType 26 27 T = t.TypeVar("T") 28 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 29 30logger = logging.getLogger("sqlglot") 31 32OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 33 34# Used to detect alphabetical characters and +/- in timestamp literals 35TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 36 37 38def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 39 if len(args) == 1 and args[0].is_star: 40 return exp.StarMap(this=args[0]) 41 42 keys = [] 43 values = [] 44 for i in range(0, len(args), 2): 45 keys.append(args[i]) 46 values.append(args[i + 1]) 47 48 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 49 50 51def build_like(args: t.List) -> exp.Escape | exp.Like: 52 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 53 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 54 55 56def binary_range_parser( 57 expr_type: t.Type[exp.Expression], reverse_args: bool = False 58) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 59 def _parse_binary_range( 60 self: Parser, this: t.Optional[exp.Expression] 61 ) -> t.Optional[exp.Expression]: 62 expression = self._parse_bitwise() 63 if reverse_args: 64 this, expression = expression, this 65 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 66 67 return _parse_binary_range 68 69 70def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 71 # Default argument order is base, expression 72 this = seq_get(args, 0) 73 expression = seq_get(args, 1) 74 75 if expression: 76 if not dialect.LOG_BASE_FIRST: 77 this, expression = expression, this 78 return exp.Log(this=this, expression=expression) 79 80 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 81 82 83def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 84 arg = seq_get(args, 0) 85 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 86 87 88def build_lower(args: t.List) -> exp.Lower | exp.Hex: 89 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 90 arg = seq_get(args, 0) 91 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 92 93 94def build_upper(args: t.List) -> exp.Upper | exp.Hex: 95 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 96 arg = seq_get(args, 0) 97 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 98 99 100def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 101 def _builder(args: t.List, dialect: Dialect) -> E: 102 expression = expr_type( 103 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 104 ) 105 if len(args) > 2 and expr_type is exp.JSONExtract: 106 expression.set("expressions", args[2:]) 107 if expr_type is exp.JSONExtractScalar: 108 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 109 110 return expression 111 112 return _builder 113 114 115def build_mod(args: t.List) -> exp.Mod: 116 this = seq_get(args, 0) 117 expression = seq_get(args, 1) 118 119 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 120 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 121 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 122 123 return exp.Mod(this=this, expression=expression) 124 125 126def build_pad(args: t.List, is_left: bool = True): 127 return exp.Pad( 128 this=seq_get(args, 0), 129 expression=seq_get(args, 1), 130 fill_pattern=seq_get(args, 2), 131 is_left=is_left, 132 ) 133 134 135def build_array_constructor( 136 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 137) -> exp.Expression: 138 array_exp = exp_class(expressions=args) 139 140 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 141 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 142 143 return array_exp 144 145 146def build_convert_timezone( 147 args: t.List, default_source_tz: t.Optional[str] = None 148) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 149 if len(args) == 2: 150 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 151 return exp.ConvertTimezone( 152 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 153 ) 154 155 return exp.ConvertTimezone.from_arg_list(args) 156 157 158def build_trim(args: t.List, is_left: bool = True, reverse_args: bool = False): 159 this, expression = seq_get(args, 0), seq_get(args, 1) 160 161 if expression and reverse_args: 162 this, expression = expression, this 163 164 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING") 165 166 167def build_coalesce( 168 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 169) -> exp.Coalesce: 170 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 171 172 173def build_locate_strposition(args: t.List): 174 return exp.StrPosition( 175 this=seq_get(args, 1), 176 substr=seq_get(args, 0), 177 position=seq_get(args, 2), 178 ) 179 180 181def build_array_append(args: t.List, dialect: Dialect) -> exp.ArrayAppend: 182 """ 183 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 184 185 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 186 Others (DuckDB, PostgreSQL) create a new single-element array instead. 187 188 Args: 189 args: Function arguments [array, element] 190 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 191 192 Returns: 193 ArrayAppend expression with appropriate null_propagation flag 194 """ 195 return exp.ArrayAppend( 196 this=seq_get(args, 0), 197 expression=seq_get(args, 1), 198 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 199 ) 200 201 202def build_array_prepend(args: t.List, dialect: Dialect) -> exp.ArrayPrepend: 203 """ 204 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 205 206 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 207 Others (DuckDB, PostgreSQL) create a new single-element array instead. 208 209 Args: 210 args: Function arguments [array, element] 211 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 212 213 Returns: 214 ArrayPrepend expression with appropriate null_propagation flag 215 """ 216 return exp.ArrayPrepend( 217 this=seq_get(args, 0), 218 expression=seq_get(args, 1), 219 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 220 ) 221 222 223def build_array_concat(args: t.List, dialect: Dialect) -> exp.ArrayConcat: 224 """ 225 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 226 227 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 228 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 229 230 Args: 231 args: Function arguments [array1, array2, ...] (variadic) 232 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 233 234 Returns: 235 ArrayConcat expression with appropriate null_propagation flag 236 """ 237 return exp.ArrayConcat( 238 this=seq_get(args, 0), 239 expressions=args[1:], 240 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 241 ) 242 243 244def build_array_remove(args: t.List, dialect: Dialect) -> exp.ArrayRemove: 245 """ 246 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 247 248 Some dialects (Snowflake) return NULL when the removal value is NULL. 249 Others (DuckDB) may return empty array due to NULL comparison semantics. 250 251 Args: 252 args: Function arguments [array, value_to_remove] 253 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 254 255 Returns: 256 ArrayRemove expression with appropriate null_propagation flag 257 """ 258 return exp.ArrayRemove( 259 this=seq_get(args, 0), 260 expression=seq_get(args, 1), 261 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 262 ) 263 264 265class _Parser(type): 266 def __new__(cls, clsname, bases, attrs): 267 klass = super().__new__(cls, clsname, bases, attrs) 268 269 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 270 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 271 272 return klass 273 274 275class Parser(metaclass=_Parser): 276 """ 277 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 278 279 Args: 280 error_level: The desired error level. 281 Default: ErrorLevel.IMMEDIATE 282 error_message_context: The amount of context to capture from a query string when displaying 283 the error message (in number of characters). 284 Default: 100 285 max_errors: Maximum number of error messages to include in a raised ParseError. 286 This is only relevant if error_level is ErrorLevel.RAISE. 287 Default: 3 288 """ 289 290 FUNCTIONS: t.Dict[str, t.Callable] = { 291 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 292 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 293 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 294 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 295 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 296 ), 297 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 298 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 299 ), 300 "ARRAY_APPEND": build_array_append, 301 "ARRAY_CAT": build_array_concat, 302 "ARRAY_CONCAT": build_array_concat, 303 "ARRAY_PREPEND": build_array_prepend, 304 "ARRAY_REMOVE": build_array_remove, 305 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 306 "CONCAT": lambda args, dialect: exp.Concat( 307 expressions=args, 308 safe=not dialect.STRICT_STRING_CONCAT, 309 coalesce=dialect.CONCAT_COALESCE, 310 ), 311 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 312 expressions=args, 313 safe=not dialect.STRICT_STRING_CONCAT, 314 coalesce=dialect.CONCAT_COALESCE, 315 ), 316 "CONVERT_TIMEZONE": build_convert_timezone, 317 "DATE_TO_DATE_STR": lambda args: exp.Cast( 318 this=seq_get(args, 0), 319 to=exp.DataType(this=exp.DataType.Type.TEXT), 320 ), 321 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 322 start=seq_get(args, 0), 323 end=seq_get(args, 1), 324 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 325 ), 326 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 327 is_string=dialect.UUID_IS_STRING_TYPE or None 328 ), 329 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 330 "GREATEST": lambda args, dialect: exp.Greatest( 331 this=seq_get(args, 0), 332 expressions=args[1:], 333 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 334 ), 335 "LEAST": lambda args, dialect: exp.Least( 336 this=seq_get(args, 0), 337 expressions=args[1:], 338 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 339 ), 340 "HEX": build_hex, 341 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 342 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 343 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 344 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "LIKE": build_like, 348 "LOG": build_logarithm, 349 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 350 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 351 "LOWER": build_lower, 352 "LPAD": lambda args: build_pad(args), 353 "LEFTPAD": lambda args: build_pad(args), 354 "LTRIM": lambda args: build_trim(args), 355 "MOD": build_mod, 356 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 357 "RPAD": lambda args: build_pad(args, is_left=False), 358 "RTRIM": lambda args: build_trim(args, is_left=False), 359 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 360 if len(args) != 2 361 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 362 "STRPOS": exp.StrPosition.from_arg_list, 363 "CHARINDEX": lambda args: build_locate_strposition(args), 364 "INSTR": exp.StrPosition.from_arg_list, 365 "LOCATE": lambda args: build_locate_strposition(args), 366 "TIME_TO_TIME_STR": lambda args: exp.Cast( 367 this=seq_get(args, 0), 368 to=exp.DataType(this=exp.DataType.Type.TEXT), 369 ), 370 "TO_HEX": build_hex, 371 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 372 this=exp.Cast( 373 this=seq_get(args, 0), 374 to=exp.DataType(this=exp.DataType.Type.TEXT), 375 ), 376 start=exp.Literal.number(1), 377 length=exp.Literal.number(10), 378 ), 379 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 380 "UPPER": build_upper, 381 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 382 "VAR_MAP": build_var_map, 383 } 384 385 NO_PAREN_FUNCTIONS = { 386 TokenType.CURRENT_DATE: exp.CurrentDate, 387 TokenType.CURRENT_DATETIME: exp.CurrentDate, 388 TokenType.CURRENT_TIME: exp.CurrentTime, 389 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 390 TokenType.CURRENT_USER: exp.CurrentUser, 391 TokenType.LOCALTIME: exp.Localtime, 392 TokenType.LOCALTIMESTAMP: exp.Localtimestamp, 393 TokenType.CURRENT_ROLE: exp.CurrentRole, 394 } 395 396 STRUCT_TYPE_TOKENS = { 397 TokenType.FILE, 398 TokenType.NESTED, 399 TokenType.OBJECT, 400 TokenType.STRUCT, 401 TokenType.UNION, 402 } 403 404 NESTED_TYPE_TOKENS = { 405 TokenType.ARRAY, 406 TokenType.LIST, 407 TokenType.LOWCARDINALITY, 408 TokenType.MAP, 409 TokenType.NULLABLE, 410 TokenType.RANGE, 411 *STRUCT_TYPE_TOKENS, 412 } 413 414 ENUM_TYPE_TOKENS = { 415 TokenType.DYNAMIC, 416 TokenType.ENUM, 417 TokenType.ENUM8, 418 TokenType.ENUM16, 419 } 420 421 AGGREGATE_TYPE_TOKENS = { 422 TokenType.AGGREGATEFUNCTION, 423 TokenType.SIMPLEAGGREGATEFUNCTION, 424 } 425 426 TYPE_TOKENS = { 427 TokenType.BIT, 428 TokenType.BOOLEAN, 429 TokenType.TINYINT, 430 TokenType.UTINYINT, 431 TokenType.SMALLINT, 432 TokenType.USMALLINT, 433 TokenType.INT, 434 TokenType.UINT, 435 TokenType.BIGINT, 436 TokenType.UBIGINT, 437 TokenType.BIGNUM, 438 TokenType.INT128, 439 TokenType.UINT128, 440 TokenType.INT256, 441 TokenType.UINT256, 442 TokenType.MEDIUMINT, 443 TokenType.UMEDIUMINT, 444 TokenType.FIXEDSTRING, 445 TokenType.FLOAT, 446 TokenType.DOUBLE, 447 TokenType.UDOUBLE, 448 TokenType.CHAR, 449 TokenType.NCHAR, 450 TokenType.VARCHAR, 451 TokenType.NVARCHAR, 452 TokenType.BPCHAR, 453 TokenType.TEXT, 454 TokenType.MEDIUMTEXT, 455 TokenType.LONGTEXT, 456 TokenType.BLOB, 457 TokenType.MEDIUMBLOB, 458 TokenType.LONGBLOB, 459 TokenType.BINARY, 460 TokenType.VARBINARY, 461 TokenType.JSON, 462 TokenType.JSONB, 463 TokenType.INTERVAL, 464 TokenType.TINYBLOB, 465 TokenType.TINYTEXT, 466 TokenType.TIME, 467 TokenType.TIMETZ, 468 TokenType.TIME_NS, 469 TokenType.TIMESTAMP, 470 TokenType.TIMESTAMP_S, 471 TokenType.TIMESTAMP_MS, 472 TokenType.TIMESTAMP_NS, 473 TokenType.TIMESTAMPTZ, 474 TokenType.TIMESTAMPLTZ, 475 TokenType.TIMESTAMPNTZ, 476 TokenType.DATETIME, 477 TokenType.DATETIME2, 478 TokenType.DATETIME64, 479 TokenType.SMALLDATETIME, 480 TokenType.DATE, 481 TokenType.DATE32, 482 TokenType.INT4RANGE, 483 TokenType.INT4MULTIRANGE, 484 TokenType.INT8RANGE, 485 TokenType.INT8MULTIRANGE, 486 TokenType.NUMRANGE, 487 TokenType.NUMMULTIRANGE, 488 TokenType.TSRANGE, 489 TokenType.TSMULTIRANGE, 490 TokenType.TSTZRANGE, 491 TokenType.TSTZMULTIRANGE, 492 TokenType.DATERANGE, 493 TokenType.DATEMULTIRANGE, 494 TokenType.DECIMAL, 495 TokenType.DECIMAL32, 496 TokenType.DECIMAL64, 497 TokenType.DECIMAL128, 498 TokenType.DECIMAL256, 499 TokenType.DECFLOAT, 500 TokenType.UDECIMAL, 501 TokenType.BIGDECIMAL, 502 TokenType.UUID, 503 TokenType.GEOGRAPHY, 504 TokenType.GEOGRAPHYPOINT, 505 TokenType.GEOMETRY, 506 TokenType.POINT, 507 TokenType.RING, 508 TokenType.LINESTRING, 509 TokenType.MULTILINESTRING, 510 TokenType.POLYGON, 511 TokenType.MULTIPOLYGON, 512 TokenType.HLLSKETCH, 513 TokenType.HSTORE, 514 TokenType.PSEUDO_TYPE, 515 TokenType.SUPER, 516 TokenType.SERIAL, 517 TokenType.SMALLSERIAL, 518 TokenType.BIGSERIAL, 519 TokenType.XML, 520 TokenType.YEAR, 521 TokenType.USERDEFINED, 522 TokenType.MONEY, 523 TokenType.SMALLMONEY, 524 TokenType.ROWVERSION, 525 TokenType.IMAGE, 526 TokenType.VARIANT, 527 TokenType.VECTOR, 528 TokenType.VOID, 529 TokenType.OBJECT, 530 TokenType.OBJECT_IDENTIFIER, 531 TokenType.INET, 532 TokenType.IPADDRESS, 533 TokenType.IPPREFIX, 534 TokenType.IPV4, 535 TokenType.IPV6, 536 TokenType.UNKNOWN, 537 TokenType.NOTHING, 538 TokenType.NULL, 539 TokenType.NAME, 540 TokenType.TDIGEST, 541 TokenType.DYNAMIC, 542 *ENUM_TYPE_TOKENS, 543 *NESTED_TYPE_TOKENS, 544 *AGGREGATE_TYPE_TOKENS, 545 } 546 547 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 548 TokenType.BIGINT: TokenType.UBIGINT, 549 TokenType.INT: TokenType.UINT, 550 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 551 TokenType.SMALLINT: TokenType.USMALLINT, 552 TokenType.TINYINT: TokenType.UTINYINT, 553 TokenType.DECIMAL: TokenType.UDECIMAL, 554 TokenType.DOUBLE: TokenType.UDOUBLE, 555 } 556 557 SUBQUERY_PREDICATES = { 558 TokenType.ANY: exp.Any, 559 TokenType.ALL: exp.All, 560 TokenType.EXISTS: exp.Exists, 561 TokenType.SOME: exp.Any, 562 } 563 564 RESERVED_TOKENS = { 565 *Tokenizer.SINGLE_TOKENS.values(), 566 TokenType.SELECT, 567 } - {TokenType.IDENTIFIER} 568 569 DB_CREATABLES = { 570 TokenType.DATABASE, 571 TokenType.DICTIONARY, 572 TokenType.FILE_FORMAT, 573 TokenType.MODEL, 574 TokenType.NAMESPACE, 575 TokenType.SCHEMA, 576 TokenType.SEMANTIC_VIEW, 577 TokenType.SEQUENCE, 578 TokenType.SINK, 579 TokenType.SOURCE, 580 TokenType.STAGE, 581 TokenType.STORAGE_INTEGRATION, 582 TokenType.STREAMLIT, 583 TokenType.TABLE, 584 TokenType.TAG, 585 TokenType.VIEW, 586 TokenType.WAREHOUSE, 587 } 588 589 CREATABLES = { 590 TokenType.COLUMN, 591 TokenType.CONSTRAINT, 592 TokenType.FOREIGN_KEY, 593 TokenType.FUNCTION, 594 TokenType.INDEX, 595 TokenType.PROCEDURE, 596 *DB_CREATABLES, 597 } 598 599 ALTERABLES = { 600 TokenType.INDEX, 601 TokenType.TABLE, 602 TokenType.VIEW, 603 TokenType.SESSION, 604 } 605 606 # Tokens that can represent identifiers 607 ID_VAR_TOKENS = { 608 TokenType.ALL, 609 TokenType.ANALYZE, 610 TokenType.ATTACH, 611 TokenType.VAR, 612 TokenType.ANTI, 613 TokenType.APPLY, 614 TokenType.ASC, 615 TokenType.ASOF, 616 TokenType.AUTO_INCREMENT, 617 TokenType.BEGIN, 618 TokenType.BPCHAR, 619 TokenType.CACHE, 620 TokenType.CASE, 621 TokenType.COLLATE, 622 TokenType.COMMAND, 623 TokenType.COMMENT, 624 TokenType.COMMIT, 625 TokenType.CONSTRAINT, 626 TokenType.COPY, 627 TokenType.CUBE, 628 TokenType.CURRENT_SCHEMA, 629 TokenType.DEFAULT, 630 TokenType.DELETE, 631 TokenType.DESC, 632 TokenType.DESCRIBE, 633 TokenType.DETACH, 634 TokenType.DICTIONARY, 635 TokenType.DIV, 636 TokenType.END, 637 TokenType.EXECUTE, 638 TokenType.EXPORT, 639 TokenType.ESCAPE, 640 TokenType.FALSE, 641 TokenType.FIRST, 642 TokenType.FILTER, 643 TokenType.FINAL, 644 TokenType.FORMAT, 645 TokenType.FULL, 646 TokenType.GET, 647 TokenType.IDENTIFIER, 648 TokenType.INOUT, 649 TokenType.IS, 650 TokenType.ISNULL, 651 TokenType.INTERVAL, 652 TokenType.KEEP, 653 TokenType.KILL, 654 TokenType.LEFT, 655 TokenType.LIMIT, 656 TokenType.LOAD, 657 TokenType.LOCK, 658 TokenType.MATCH, 659 TokenType.MERGE, 660 TokenType.NATURAL, 661 TokenType.NEXT, 662 TokenType.OFFSET, 663 TokenType.OPERATOR, 664 TokenType.ORDINALITY, 665 TokenType.OVER, 666 TokenType.OVERLAPS, 667 TokenType.OVERWRITE, 668 TokenType.PARTITION, 669 TokenType.PERCENT, 670 TokenType.PIVOT, 671 TokenType.PRAGMA, 672 TokenType.PUT, 673 TokenType.RANGE, 674 TokenType.RECURSIVE, 675 TokenType.REFERENCES, 676 TokenType.REFRESH, 677 TokenType.RENAME, 678 TokenType.REPLACE, 679 TokenType.RIGHT, 680 TokenType.ROLLUP, 681 TokenType.ROW, 682 TokenType.ROWS, 683 TokenType.SEMI, 684 TokenType.SET, 685 TokenType.SETTINGS, 686 TokenType.SHOW, 687 TokenType.TEMPORARY, 688 TokenType.TOP, 689 TokenType.TRUE, 690 TokenType.TRUNCATE, 691 TokenType.UNIQUE, 692 TokenType.UNNEST, 693 TokenType.UNPIVOT, 694 TokenType.UPDATE, 695 TokenType.USE, 696 TokenType.VOLATILE, 697 TokenType.WINDOW, 698 *ALTERABLES, 699 *CREATABLES, 700 *SUBQUERY_PREDICATES, 701 *TYPE_TOKENS, 702 *NO_PAREN_FUNCTIONS, 703 } 704 ID_VAR_TOKENS.remove(TokenType.UNION) 705 706 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 707 TokenType.ANTI, 708 TokenType.ASOF, 709 TokenType.FULL, 710 TokenType.LEFT, 711 TokenType.LOCK, 712 TokenType.NATURAL, 713 TokenType.RIGHT, 714 TokenType.SEMI, 715 TokenType.WINDOW, 716 } 717 718 ALIAS_TOKENS = ID_VAR_TOKENS 719 720 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 721 722 ARRAY_CONSTRUCTORS = { 723 "ARRAY": exp.Array, 724 "LIST": exp.List, 725 } 726 727 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 728 729 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 730 731 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 732 733 FUNC_TOKENS = { 734 TokenType.COLLATE, 735 TokenType.COMMAND, 736 TokenType.CURRENT_DATE, 737 TokenType.CURRENT_DATETIME, 738 TokenType.CURRENT_SCHEMA, 739 TokenType.CURRENT_TIMESTAMP, 740 TokenType.CURRENT_TIME, 741 TokenType.CURRENT_USER, 742 TokenType.CURRENT_CATALOG, 743 TokenType.FILTER, 744 TokenType.FIRST, 745 TokenType.FORMAT, 746 TokenType.GET, 747 TokenType.GLOB, 748 TokenType.IDENTIFIER, 749 TokenType.INDEX, 750 TokenType.ISNULL, 751 TokenType.ILIKE, 752 TokenType.INSERT, 753 TokenType.LIKE, 754 TokenType.LOCALTIME, 755 TokenType.LOCALTIMESTAMP, 756 TokenType.MERGE, 757 TokenType.NEXT, 758 TokenType.OFFSET, 759 TokenType.PRIMARY_KEY, 760 TokenType.RANGE, 761 TokenType.REPLACE, 762 TokenType.RLIKE, 763 TokenType.ROW, 764 TokenType.SESSION_USER, 765 TokenType.UNNEST, 766 TokenType.VAR, 767 TokenType.LEFT, 768 TokenType.RIGHT, 769 TokenType.SEQUENCE, 770 TokenType.DATE, 771 TokenType.DATETIME, 772 TokenType.TABLE, 773 TokenType.TIMESTAMP, 774 TokenType.TIMESTAMPTZ, 775 TokenType.TRUNCATE, 776 TokenType.UTC_DATE, 777 TokenType.UTC_TIME, 778 TokenType.UTC_TIMESTAMP, 779 TokenType.WINDOW, 780 TokenType.XOR, 781 *TYPE_TOKENS, 782 *SUBQUERY_PREDICATES, 783 } 784 785 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 786 TokenType.AND: exp.And, 787 } 788 789 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 790 TokenType.COLON_EQ: exp.PropertyEQ, 791 } 792 793 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 794 TokenType.OR: exp.Or, 795 } 796 797 EQUALITY = { 798 TokenType.EQ: exp.EQ, 799 TokenType.NEQ: exp.NEQ, 800 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 801 } 802 803 COMPARISON = { 804 TokenType.GT: exp.GT, 805 TokenType.GTE: exp.GTE, 806 TokenType.LT: exp.LT, 807 TokenType.LTE: exp.LTE, 808 } 809 810 BITWISE = { 811 TokenType.AMP: exp.BitwiseAnd, 812 TokenType.CARET: exp.BitwiseXor, 813 TokenType.PIPE: exp.BitwiseOr, 814 } 815 816 TERM = { 817 TokenType.DASH: exp.Sub, 818 TokenType.PLUS: exp.Add, 819 TokenType.MOD: exp.Mod, 820 TokenType.COLLATE: exp.Collate, 821 } 822 823 FACTOR = { 824 TokenType.DIV: exp.IntDiv, 825 TokenType.LR_ARROW: exp.Distance, 826 TokenType.SLASH: exp.Div, 827 TokenType.STAR: exp.Mul, 828 } 829 830 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 831 832 TIMES = { 833 TokenType.TIME, 834 TokenType.TIMETZ, 835 } 836 837 TIMESTAMPS = { 838 TokenType.TIMESTAMP, 839 TokenType.TIMESTAMPNTZ, 840 TokenType.TIMESTAMPTZ, 841 TokenType.TIMESTAMPLTZ, 842 *TIMES, 843 } 844 845 SET_OPERATIONS = { 846 TokenType.UNION, 847 TokenType.INTERSECT, 848 TokenType.EXCEPT, 849 } 850 851 JOIN_METHODS = { 852 TokenType.ASOF, 853 TokenType.NATURAL, 854 TokenType.POSITIONAL, 855 } 856 857 JOIN_SIDES = { 858 TokenType.LEFT, 859 TokenType.RIGHT, 860 TokenType.FULL, 861 } 862 863 JOIN_KINDS = { 864 TokenType.ANTI, 865 TokenType.CROSS, 866 TokenType.INNER, 867 TokenType.OUTER, 868 TokenType.SEMI, 869 TokenType.STRAIGHT_JOIN, 870 } 871 872 JOIN_HINTS: t.Set[str] = set() 873 874 LAMBDAS = { 875 TokenType.ARROW: lambda self, expressions: self.expression( 876 exp.Lambda, 877 this=self._replace_lambda( 878 self._parse_disjunction(), 879 expressions, 880 ), 881 expressions=expressions, 882 ), 883 TokenType.FARROW: lambda self, expressions: self.expression( 884 exp.Kwarg, 885 this=exp.var(expressions[0].name), 886 expression=self._parse_disjunction(), 887 ), 888 } 889 890 COLUMN_OPERATORS = { 891 TokenType.DOT: None, 892 TokenType.DOTCOLON: lambda self, this, to: self.expression( 893 exp.JSONCast, 894 this=this, 895 to=to, 896 ), 897 TokenType.DCOLON: lambda self, this, to: self.build_cast( 898 strict=self.STRICT_CAST, this=this, to=to 899 ), 900 TokenType.ARROW: lambda self, this, path: self.expression( 901 exp.JSONExtract, 902 this=this, 903 expression=self.dialect.to_json_path(path), 904 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 905 ), 906 TokenType.DARROW: lambda self, this, path: self.expression( 907 exp.JSONExtractScalar, 908 this=this, 909 expression=self.dialect.to_json_path(path), 910 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 911 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 912 ), 913 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 914 exp.JSONBExtract, 915 this=this, 916 expression=path, 917 ), 918 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 919 exp.JSONBExtractScalar, 920 this=this, 921 expression=path, 922 ), 923 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 924 exp.JSONBContains, 925 this=this, 926 expression=key, 927 ), 928 } 929 930 CAST_COLUMN_OPERATORS = { 931 TokenType.DOTCOLON, 932 TokenType.DCOLON, 933 } 934 935 EXPRESSION_PARSERS = { 936 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 937 exp.Column: lambda self: self._parse_column(), 938 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 939 exp.Condition: lambda self: self._parse_disjunction(), 940 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 941 exp.Expression: lambda self: self._parse_expression(), 942 exp.From: lambda self: self._parse_from(joins=True), 943 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 944 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 945 exp.Group: lambda self: self._parse_group(), 946 exp.Having: lambda self: self._parse_having(), 947 exp.Hint: lambda self: self._parse_hint_body(), 948 exp.Identifier: lambda self: self._parse_id_var(), 949 exp.Join: lambda self: self._parse_join(), 950 exp.Lambda: lambda self: self._parse_lambda(), 951 exp.Lateral: lambda self: self._parse_lateral(), 952 exp.Limit: lambda self: self._parse_limit(), 953 exp.Offset: lambda self: self._parse_offset(), 954 exp.Order: lambda self: self._parse_order(), 955 exp.Ordered: lambda self: self._parse_ordered(), 956 exp.Properties: lambda self: self._parse_properties(), 957 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 958 exp.Qualify: lambda self: self._parse_qualify(), 959 exp.Returning: lambda self: self._parse_returning(), 960 exp.Select: lambda self: self._parse_select(), 961 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 962 exp.Table: lambda self: self._parse_table_parts(), 963 exp.TableAlias: lambda self: self._parse_table_alias(), 964 exp.Tuple: lambda self: self._parse_value(values=False), 965 exp.Whens: lambda self: self._parse_when_matched(), 966 exp.Where: lambda self: self._parse_where(), 967 exp.Window: lambda self: self._parse_named_window(), 968 exp.With: lambda self: self._parse_with(), 969 "JOIN_TYPE": lambda self: self._parse_join_parts(), 970 } 971 972 STATEMENT_PARSERS = { 973 TokenType.ALTER: lambda self: self._parse_alter(), 974 TokenType.ANALYZE: lambda self: self._parse_analyze(), 975 TokenType.BEGIN: lambda self: self._parse_transaction(), 976 TokenType.CACHE: lambda self: self._parse_cache(), 977 TokenType.COMMENT: lambda self: self._parse_comment(), 978 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 979 TokenType.COPY: lambda self: self._parse_copy(), 980 TokenType.CREATE: lambda self: self._parse_create(), 981 TokenType.DELETE: lambda self: self._parse_delete(), 982 TokenType.DESC: lambda self: self._parse_describe(), 983 TokenType.DESCRIBE: lambda self: self._parse_describe(), 984 TokenType.DROP: lambda self: self._parse_drop(), 985 TokenType.GRANT: lambda self: self._parse_grant(), 986 TokenType.REVOKE: lambda self: self._parse_revoke(), 987 TokenType.INSERT: lambda self: self._parse_insert(), 988 TokenType.KILL: lambda self: self._parse_kill(), 989 TokenType.LOAD: lambda self: self._parse_load(), 990 TokenType.MERGE: lambda self: self._parse_merge(), 991 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 992 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 993 TokenType.REFRESH: lambda self: self._parse_refresh(), 994 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 995 TokenType.SET: lambda self: self._parse_set(), 996 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 997 TokenType.UNCACHE: lambda self: self._parse_uncache(), 998 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 999 TokenType.UPDATE: lambda self: self._parse_update(), 1000 TokenType.USE: lambda self: self._parse_use(), 1001 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1002 } 1003 1004 UNARY_PARSERS = { 1005 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1006 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 1007 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 1008 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 1009 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 1010 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 1011 } 1012 1013 STRING_PARSERS = { 1014 TokenType.HEREDOC_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1015 TokenType.NATIONAL_STRING: lambda self, token: self.expression(exp.National, token=token), 1016 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1017 TokenType.STRING: lambda self, token: self.expression( 1018 exp.Literal, token=token, is_string=True 1019 ), 1020 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1021 exp.UnicodeString, 1022 token=token, 1023 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 1024 ), 1025 } 1026 1027 NUMERIC_PARSERS = { 1028 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, token=token), 1029 TokenType.BYTE_STRING: lambda self, token: self.expression( 1030 exp.ByteString, 1031 token=token, 1032 is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None, 1033 ), 1034 TokenType.HEX_STRING: lambda self, token: self.expression( 1035 exp.HexString, 1036 token=token, 1037 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 1038 ), 1039 TokenType.NUMBER: lambda self, token: self.expression( 1040 exp.Literal, token=token, is_string=False 1041 ), 1042 } 1043 1044 PRIMARY_PARSERS = { 1045 **STRING_PARSERS, 1046 **NUMERIC_PARSERS, 1047 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1048 TokenType.NULL: lambda self, _: self.expression(exp.Null), 1049 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 1050 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 1051 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1052 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1053 } 1054 1055 PLACEHOLDER_PARSERS = { 1056 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 1057 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1058 TokenType.COLON: lambda self: ( 1059 self.expression(exp.Placeholder, this=self._prev.text) 1060 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1061 else None 1062 ), 1063 } 1064 1065 RANGE_PARSERS = { 1066 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1067 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1068 TokenType.GLOB: binary_range_parser(exp.Glob), 1069 TokenType.ILIKE: binary_range_parser(exp.ILike), 1070 TokenType.IN: lambda self, this: self._parse_in(this), 1071 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1072 TokenType.IS: lambda self, this: self._parse_is(this), 1073 TokenType.LIKE: binary_range_parser(exp.Like), 1074 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1075 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1076 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1077 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1078 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1079 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1080 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1081 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1082 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1083 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1084 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1085 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1086 } 1087 1088 PIPE_SYNTAX_TRANSFORM_PARSERS = { 1089 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1090 "AS": lambda self, query: self._build_pipe_cte( 1091 query, [exp.Star()], self._parse_table_alias() 1092 ), 1093 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1094 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1095 "ORDER BY": lambda self, query: query.order_by( 1096 self._parse_order(), append=False, copy=False 1097 ), 1098 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1099 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1100 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1101 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1102 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1103 } 1104 1105 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 1106 "ALLOWED_VALUES": lambda self: self.expression( 1107 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 1108 ), 1109 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1110 "AUTO": lambda self: self._parse_auto_property(), 1111 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1112 "BACKUP": lambda self: self.expression( 1113 exp.BackupProperty, this=self._parse_var(any_token=True) 1114 ), 1115 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1116 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1117 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1118 "CHECKSUM": lambda self: self._parse_checksum(), 1119 "CLUSTER BY": lambda self: self._parse_cluster(), 1120 "CLUSTERED": lambda self: self._parse_clustered_by(), 1121 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1122 exp.CollateProperty, **kwargs 1123 ), 1124 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1125 "CONTAINS": lambda self: self._parse_contains_property(), 1126 "COPY": lambda self: self._parse_copy_property(), 1127 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1128 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1129 "DEFINER": lambda self: self._parse_definer(), 1130 "DETERMINISTIC": lambda self: self.expression( 1131 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1132 ), 1133 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1134 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1135 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1136 "DISTKEY": lambda self: self._parse_distkey(), 1137 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1138 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1139 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1140 "ENVIRONMENT": lambda self: self.expression( 1141 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1142 ), 1143 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1144 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1145 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1146 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1147 "FREESPACE": lambda self: self._parse_freespace(), 1148 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1149 "HEAP": lambda self: self.expression(exp.HeapProperty), 1150 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1151 "IMMUTABLE": lambda self: self.expression( 1152 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1153 ), 1154 "INHERITS": lambda self: self.expression( 1155 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1156 ), 1157 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1158 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1159 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1160 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1161 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1162 "LIKE": lambda self: self._parse_create_like(), 1163 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1164 "LOCK": lambda self: self._parse_locking(), 1165 "LOCKING": lambda self: self._parse_locking(), 1166 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1167 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1168 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1169 "MODIFIES": lambda self: self._parse_modifies_property(), 1170 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1171 "NO": lambda self: self._parse_no_property(), 1172 "ON": lambda self: self._parse_on_property(), 1173 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1174 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1175 "PARTITION": lambda self: self._parse_partitioned_of(), 1176 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1177 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1178 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1179 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1180 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1181 "READS": lambda self: self._parse_reads_property(), 1182 "REMOTE": lambda self: self._parse_remote_with_connection(), 1183 "RETURNS": lambda self: self._parse_returns(), 1184 "STRICT": lambda self: self.expression(exp.StrictProperty), 1185 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1186 "ROW": lambda self: self._parse_row(), 1187 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1188 "SAMPLE": lambda self: self.expression( 1189 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1190 ), 1191 "SECURE": lambda self: self.expression(exp.SecureProperty), 1192 "SECURITY": lambda self: self._parse_security(), 1193 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1194 "SETTINGS": lambda self: self._parse_settings_property(), 1195 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1196 "SORTKEY": lambda self: self._parse_sortkey(), 1197 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1198 "STABLE": lambda self: self.expression( 1199 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1200 ), 1201 "STORED": lambda self: self._parse_stored(), 1202 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1203 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1204 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1205 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1206 "TO": lambda self: self._parse_to_table(), 1207 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1208 "TRANSFORM": lambda self: self.expression( 1209 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1210 ), 1211 "TTL": lambda self: self._parse_ttl(), 1212 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1213 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1214 "VOLATILE": lambda self: self._parse_volatile_property(), 1215 "WITH": lambda self: self._parse_with_property(), 1216 } 1217 1218 CONSTRAINT_PARSERS = { 1219 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1220 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1221 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1222 "CHARACTER SET": lambda self: self.expression( 1223 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1224 ), 1225 "CHECK": lambda self: self._parse_check_constraint(), 1226 "COLLATE": lambda self: self.expression( 1227 exp.CollateColumnConstraint, 1228 this=self._parse_identifier() or self._parse_column(), 1229 ), 1230 "COMMENT": lambda self: self.expression( 1231 exp.CommentColumnConstraint, this=self._parse_string() 1232 ), 1233 "COMPRESS": lambda self: self._parse_compress(), 1234 "CLUSTERED": lambda self: self.expression( 1235 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1236 ), 1237 "NONCLUSTERED": lambda self: self.expression( 1238 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1239 ), 1240 "DEFAULT": lambda self: self.expression( 1241 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1242 ), 1243 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1244 "EPHEMERAL": lambda self: self.expression( 1245 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1246 ), 1247 "EXCLUDE": lambda self: self.expression( 1248 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1249 ), 1250 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1251 "FORMAT": lambda self: self.expression( 1252 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1253 ), 1254 "GENERATED": lambda self: self._parse_generated_as_identity(), 1255 "IDENTITY": lambda self: self._parse_auto_increment(), 1256 "INLINE": lambda self: self._parse_inline(), 1257 "LIKE": lambda self: self._parse_create_like(), 1258 "NOT": lambda self: self._parse_not_constraint(), 1259 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1260 "ON": lambda self: ( 1261 self._match(TokenType.UPDATE) 1262 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1263 ) 1264 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1265 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1266 "PERIOD": lambda self: self._parse_period_for_system_time(), 1267 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1268 "REFERENCES": lambda self: self._parse_references(match=False), 1269 "TITLE": lambda self: self.expression( 1270 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1271 ), 1272 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1273 "UNIQUE": lambda self: self._parse_unique(), 1274 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1275 "WITH": lambda self: self.expression( 1276 exp.Properties, expressions=self._parse_wrapped_properties() 1277 ), 1278 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1279 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1280 } 1281 1282 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1283 if not self._match(TokenType.L_PAREN, advance=False): 1284 # Partitioning by bucket or truncate follows the syntax: 1285 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1286 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1287 self._retreat(self._index - 1) 1288 return None 1289 1290 klass = ( 1291 exp.PartitionedByBucket 1292 if self._prev.text.upper() == "BUCKET" 1293 else exp.PartitionByTruncate 1294 ) 1295 1296 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1297 this, expression = seq_get(args, 0), seq_get(args, 1) 1298 1299 if isinstance(this, exp.Literal): 1300 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1301 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1302 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1303 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1304 # 1305 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1306 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1307 this, expression = expression, this 1308 1309 return self.expression(klass, this=this, expression=expression) 1310 1311 ALTER_PARSERS = { 1312 "ADD": lambda self: self._parse_alter_table_add(), 1313 "AS": lambda self: self._parse_select(), 1314 "ALTER": lambda self: self._parse_alter_table_alter(), 1315 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1316 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1317 "DROP": lambda self: self._parse_alter_table_drop(), 1318 "RENAME": lambda self: self._parse_alter_table_rename(), 1319 "SET": lambda self: self._parse_alter_table_set(), 1320 "SWAP": lambda self: self.expression( 1321 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1322 ), 1323 } 1324 1325 ALTER_ALTER_PARSERS = { 1326 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1327 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1328 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1329 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1330 } 1331 1332 SCHEMA_UNNAMED_CONSTRAINTS = { 1333 "CHECK", 1334 "EXCLUDE", 1335 "FOREIGN KEY", 1336 "LIKE", 1337 "PERIOD", 1338 "PRIMARY KEY", 1339 "UNIQUE", 1340 "BUCKET", 1341 "TRUNCATE", 1342 } 1343 1344 NO_PAREN_FUNCTION_PARSERS = { 1345 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1346 "CASE": lambda self: self._parse_case(), 1347 "CONNECT_BY_ROOT": lambda self: self.expression( 1348 exp.ConnectByRoot, this=self._parse_column() 1349 ), 1350 "IF": lambda self: self._parse_if(), 1351 } 1352 1353 INVALID_FUNC_NAME_TOKENS = { 1354 TokenType.IDENTIFIER, 1355 TokenType.STRING, 1356 } 1357 1358 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1359 1360 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1361 1362 FUNCTION_PARSERS = { 1363 **{ 1364 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1365 }, 1366 **{ 1367 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1368 }, 1369 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1370 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1371 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1372 "CHAR": lambda self: self._parse_char(), 1373 "CHR": lambda self: self._parse_char(), 1374 "DECODE": lambda self: self._parse_decode(), 1375 "EXTRACT": lambda self: self._parse_extract(), 1376 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1377 "GAP_FILL": lambda self: self._parse_gap_fill(), 1378 "INITCAP": lambda self: self._parse_initcap(), 1379 "JSON_OBJECT": lambda self: self._parse_json_object(), 1380 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1381 "JSON_TABLE": lambda self: self._parse_json_table(), 1382 "MATCH": lambda self: self._parse_match_against(), 1383 "NORMALIZE": lambda self: self._parse_normalize(), 1384 "OPENJSON": lambda self: self._parse_open_json(), 1385 "OVERLAY": lambda self: self._parse_overlay(), 1386 "POSITION": lambda self: self._parse_position(), 1387 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1388 "STRING_AGG": lambda self: self._parse_string_agg(), 1389 "SUBSTRING": lambda self: self._parse_substring(), 1390 "TRIM": lambda self: self._parse_trim(), 1391 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1392 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1393 "XMLELEMENT": lambda self: self._parse_xml_element(), 1394 "XMLTABLE": lambda self: self._parse_xml_table(), 1395 } 1396 1397 QUERY_MODIFIER_PARSERS = { 1398 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1399 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1400 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1401 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1402 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1403 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1404 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1405 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1406 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1407 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1408 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1409 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1410 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1411 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1412 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1413 TokenType.CLUSTER_BY: lambda self: ( 1414 "cluster", 1415 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1416 ), 1417 TokenType.DISTRIBUTE_BY: lambda self: ( 1418 "distribute", 1419 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1420 ), 1421 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1422 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1423 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1424 } 1425 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1426 1427 SET_PARSERS = { 1428 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1429 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1430 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1431 "TRANSACTION": lambda self: self._parse_set_transaction(), 1432 } 1433 1434 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1435 1436 TYPE_LITERAL_PARSERS = { 1437 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1438 } 1439 1440 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1441 1442 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1443 1444 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1445 1446 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1447 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1448 "ISOLATION": ( 1449 ("LEVEL", "REPEATABLE", "READ"), 1450 ("LEVEL", "READ", "COMMITTED"), 1451 ("LEVEL", "READ", "UNCOMITTED"), 1452 ("LEVEL", "SERIALIZABLE"), 1453 ), 1454 "READ": ("WRITE", "ONLY"), 1455 } 1456 1457 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1458 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1459 ) 1460 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1461 1462 CREATE_SEQUENCE: OPTIONS_TYPE = { 1463 "SCALE": ("EXTEND", "NOEXTEND"), 1464 "SHARD": ("EXTEND", "NOEXTEND"), 1465 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1466 **dict.fromkeys( 1467 ( 1468 "SESSION", 1469 "GLOBAL", 1470 "KEEP", 1471 "NOKEEP", 1472 "ORDER", 1473 "NOORDER", 1474 "NOCACHE", 1475 "CYCLE", 1476 "NOCYCLE", 1477 "NOMINVALUE", 1478 "NOMAXVALUE", 1479 "NOSCALE", 1480 "NOSHARD", 1481 ), 1482 tuple(), 1483 ), 1484 } 1485 1486 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1487 1488 USABLES: OPTIONS_TYPE = dict.fromkeys( 1489 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1490 ) 1491 1492 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1493 1494 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1495 "TYPE": ("EVOLUTION",), 1496 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1497 } 1498 1499 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1500 1501 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1502 1503 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1504 "NOT": ("ENFORCED",), 1505 "MATCH": ( 1506 "FULL", 1507 "PARTIAL", 1508 "SIMPLE", 1509 ), 1510 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1511 "USING": ( 1512 "BTREE", 1513 "HASH", 1514 ), 1515 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1516 } 1517 1518 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1519 "NO": ("OTHERS",), 1520 "CURRENT": ("ROW",), 1521 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1522 } 1523 1524 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1525 1526 CLONE_KEYWORDS = {"CLONE", "COPY"} 1527 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1528 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1529 1530 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1531 1532 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1533 1534 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1535 1536 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1537 1538 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1539 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1540 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1541 1542 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1543 1544 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1545 1546 ADD_CONSTRAINT_TOKENS = { 1547 TokenType.CONSTRAINT, 1548 TokenType.FOREIGN_KEY, 1549 TokenType.INDEX, 1550 TokenType.KEY, 1551 TokenType.PRIMARY_KEY, 1552 TokenType.UNIQUE, 1553 } 1554 1555 DISTINCT_TOKENS = {TokenType.DISTINCT} 1556 1557 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1558 1559 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1560 1561 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1562 1563 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1564 1565 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1566 1567 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1568 1569 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1570 1571 # The style options for the DESCRIBE statement 1572 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1573 1574 SET_ASSIGNMENT_DELIMITERS = {"=", ":=", "TO"} 1575 1576 # The style options for the ANALYZE statement 1577 ANALYZE_STYLES = { 1578 "BUFFER_USAGE_LIMIT", 1579 "FULL", 1580 "LOCAL", 1581 "NO_WRITE_TO_BINLOG", 1582 "SAMPLE", 1583 "SKIP_LOCKED", 1584 "VERBOSE", 1585 } 1586 1587 ANALYZE_EXPRESSION_PARSERS = { 1588 "ALL": lambda self: self._parse_analyze_columns(), 1589 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1590 "DELETE": lambda self: self._parse_analyze_delete(), 1591 "DROP": lambda self: self._parse_analyze_histogram(), 1592 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1593 "LIST": lambda self: self._parse_analyze_list(), 1594 "PREDICATE": lambda self: self._parse_analyze_columns(), 1595 "UPDATE": lambda self: self._parse_analyze_histogram(), 1596 "VALIDATE": lambda self: self._parse_analyze_validate(), 1597 } 1598 1599 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1600 1601 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1602 1603 OPERATION_MODIFIERS: t.Set[str] = set() 1604 1605 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1606 1607 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1608 1609 STRICT_CAST = True 1610 1611 PREFIXED_PIVOT_COLUMNS = False 1612 IDENTIFY_PIVOT_STRINGS = False 1613 1614 LOG_DEFAULTS_TO_LN = False 1615 1616 # Whether the table sample clause expects CSV syntax 1617 TABLESAMPLE_CSV = False 1618 1619 # The default method used for table sampling 1620 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1621 1622 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1623 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1624 1625 # Whether the TRIM function expects the characters to trim as its first argument 1626 TRIM_PATTERN_FIRST = False 1627 1628 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1629 STRING_ALIASES = False 1630 1631 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1632 MODIFIERS_ATTACHED_TO_SET_OP = True 1633 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1634 1635 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1636 NO_PAREN_IF_COMMANDS = True 1637 1638 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1639 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1640 1641 # Whether the `:` operator is used to extract a value from a VARIANT column 1642 COLON_IS_VARIANT_EXTRACT = False 1643 1644 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1645 # If this is True and '(' is not found, the keyword will be treated as an identifier 1646 VALUES_FOLLOWED_BY_PAREN = True 1647 1648 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1649 SUPPORTS_IMPLICIT_UNNEST = False 1650 1651 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1652 INTERVAL_SPANS = True 1653 1654 # Whether a PARTITION clause can follow a table reference 1655 SUPPORTS_PARTITION_SELECTION = False 1656 1657 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1658 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1659 1660 # Whether the 'AS' keyword is optional in the CTE definition syntax 1661 OPTIONAL_ALIAS_TOKEN_CTE = True 1662 1663 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1664 ALTER_RENAME_REQUIRES_COLUMN = True 1665 1666 # Whether Alter statements are allowed to contain Partition specifications 1667 ALTER_TABLE_PARTITIONS = False 1668 1669 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1670 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1671 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1672 # as BigQuery, where all joins have the same precedence. 1673 JOINS_HAVE_EQUAL_PRECEDENCE = False 1674 1675 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1676 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1677 1678 # Whether map literals support arbitrary expressions as keys. 1679 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1680 # When False, keys are typically restricted to identifiers. 1681 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1682 1683 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1684 # is true for Snowflake but not for BigQuery which can also process strings 1685 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1686 1687 # Dialects like Databricks support JOINS without join criteria 1688 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1689 ADD_JOIN_ON_TRUE = False 1690 1691 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1692 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1693 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1694 1695 __slots__ = ( 1696 "error_level", 1697 "error_message_context", 1698 "max_errors", 1699 "dialect", 1700 "sql", 1701 "errors", 1702 "_tokens", 1703 "_index", 1704 "_curr", 1705 "_next", 1706 "_prev", 1707 "_prev_comments", 1708 "_pipe_cte_counter", 1709 ) 1710 1711 # Autofilled 1712 SHOW_TRIE: t.Dict = {} 1713 SET_TRIE: t.Dict = {} 1714 1715 def __init__( 1716 self, 1717 error_level: t.Optional[ErrorLevel] = None, 1718 error_message_context: int = 100, 1719 max_errors: int = 3, 1720 dialect: DialectType = None, 1721 ): 1722 from sqlglot.dialects import Dialect 1723 1724 self.error_level = error_level or ErrorLevel.IMMEDIATE 1725 self.error_message_context = error_message_context 1726 self.max_errors = max_errors 1727 self.dialect = Dialect.get_or_raise(dialect) 1728 self.reset() 1729 1730 def reset(self): 1731 self.sql = "" 1732 self.errors = [] 1733 self._tokens = [] 1734 self._index = 0 1735 self._curr = None 1736 self._next = None 1737 self._prev = None 1738 self._prev_comments = None 1739 self._pipe_cte_counter = 0 1740 1741 def parse( 1742 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1743 ) -> t.List[t.Optional[exp.Expression]]: 1744 """ 1745 Parses a list of tokens and returns a list of syntax trees, one tree 1746 per parsed SQL statement. 1747 1748 Args: 1749 raw_tokens: The list of tokens. 1750 sql: The original SQL string, used to produce helpful debug messages. 1751 1752 Returns: 1753 The list of the produced syntax trees. 1754 """ 1755 return self._parse( 1756 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1757 ) 1758 1759 def parse_into( 1760 self, 1761 expression_types: exp.IntoType, 1762 raw_tokens: t.List[Token], 1763 sql: t.Optional[str] = None, 1764 ) -> t.List[t.Optional[exp.Expression]]: 1765 """ 1766 Parses a list of tokens into a given Expression type. If a collection of Expression 1767 types is given instead, this method will try to parse the token list into each one 1768 of them, stopping at the first for which the parsing succeeds. 1769 1770 Args: 1771 expression_types: The expression type(s) to try and parse the token list into. 1772 raw_tokens: The list of tokens. 1773 sql: The original SQL string, used to produce helpful debug messages. 1774 1775 Returns: 1776 The target Expression. 1777 """ 1778 errors = [] 1779 for expression_type in ensure_list(expression_types): 1780 parser = self.EXPRESSION_PARSERS.get(expression_type) 1781 if not parser: 1782 raise TypeError(f"No parser registered for {expression_type}") 1783 1784 try: 1785 return self._parse(parser, raw_tokens, sql) 1786 except ParseError as e: 1787 e.errors[0]["into_expression"] = expression_type 1788 errors.append(e) 1789 1790 raise ParseError( 1791 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1792 errors=merge_errors(errors), 1793 ) from errors[-1] 1794 1795 def _parse( 1796 self, 1797 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1798 raw_tokens: t.List[Token], 1799 sql: t.Optional[str] = None, 1800 ) -> t.List[t.Optional[exp.Expression]]: 1801 self.reset() 1802 self.sql = sql or "" 1803 1804 total = len(raw_tokens) 1805 chunks: t.List[t.List[Token]] = [[]] 1806 1807 for i, token in enumerate(raw_tokens): 1808 if token.token_type == TokenType.SEMICOLON: 1809 if token.comments: 1810 chunks.append([token]) 1811 1812 if i < total - 1: 1813 chunks.append([]) 1814 else: 1815 chunks[-1].append(token) 1816 1817 expressions = [] 1818 1819 for tokens in chunks: 1820 self._index = -1 1821 self._tokens = tokens 1822 self._advance() 1823 1824 expressions.append(parse_method(self)) 1825 1826 if self._index < len(self._tokens): 1827 self.raise_error("Invalid expression / Unexpected token") 1828 1829 self.check_errors() 1830 1831 return expressions 1832 1833 def check_errors(self) -> None: 1834 """Logs or raises any found errors, depending on the chosen error level setting.""" 1835 if self.error_level == ErrorLevel.WARN: 1836 for error in self.errors: 1837 logger.error(str(error)) 1838 elif self.error_level == ErrorLevel.RAISE and self.errors: 1839 raise ParseError( 1840 concat_messages(self.errors, self.max_errors), 1841 errors=merge_errors(self.errors), 1842 ) 1843 1844 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1845 """ 1846 Appends an error in the list of recorded errors or raises it, depending on the chosen 1847 error level setting. 1848 """ 1849 token = token or self._curr or self._prev or Token.string("") 1850 formatted_sql, start_context, highlight, end_context = highlight_sql( 1851 sql=self.sql, 1852 positions=[(token.start, token.end)], 1853 context_length=self.error_message_context, 1854 ) 1855 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1856 1857 error = ParseError.new( 1858 formatted_message, 1859 description=message, 1860 line=token.line, 1861 col=token.col, 1862 start_context=start_context, 1863 highlight=highlight, 1864 end_context=end_context, 1865 ) 1866 1867 if self.error_level == ErrorLevel.IMMEDIATE: 1868 raise error 1869 1870 self.errors.append(error) 1871 1872 def expression( 1873 self, 1874 exp_class: t.Type[E], 1875 token: t.Optional[Token] = None, 1876 comments: t.Optional[t.List[str]] = None, 1877 **kwargs, 1878 ) -> E: 1879 """ 1880 Creates a new, validated Expression. 1881 1882 Args: 1883 exp_class: The expression class to instantiate. 1884 comments: An optional list of comments to attach to the expression. 1885 kwargs: The arguments to set for the expression along with their respective values. 1886 1887 Returns: 1888 The target expression. 1889 """ 1890 if token: 1891 instance = exp_class(this=token.text, **kwargs) 1892 instance.update_positions(token) 1893 else: 1894 instance = exp_class(**kwargs) 1895 instance.add_comments(comments) if comments else self._add_comments(instance) 1896 return self.validate_expression(instance) 1897 1898 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1899 if expression and self._prev_comments: 1900 expression.add_comments(self._prev_comments) 1901 self._prev_comments = None 1902 1903 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1904 """ 1905 Validates an Expression, making sure that all its mandatory arguments are set. 1906 1907 Args: 1908 expression: The expression to validate. 1909 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1910 1911 Returns: 1912 The validated expression. 1913 """ 1914 if self.error_level != ErrorLevel.IGNORE: 1915 for error_message in expression.error_messages(args): 1916 self.raise_error(error_message) 1917 1918 return expression 1919 1920 def _find_sql(self, start: Token, end: Token) -> str: 1921 return self.sql[start.start : end.end + 1] 1922 1923 def _is_connected(self) -> bool: 1924 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1925 1926 def _advance(self, times: int = 1) -> None: 1927 self._index += times 1928 self._curr = seq_get(self._tokens, self._index) 1929 self._next = seq_get(self._tokens, self._index + 1) 1930 1931 if self._index > 0: 1932 self._prev = self._tokens[self._index - 1] 1933 self._prev_comments = self._prev.comments 1934 else: 1935 self._prev = None 1936 self._prev_comments = None 1937 1938 def _retreat(self, index: int) -> None: 1939 if index != self._index: 1940 self._advance(index - self._index) 1941 1942 def _warn_unsupported(self) -> None: 1943 if len(self._tokens) <= 1: 1944 return 1945 1946 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1947 # interested in emitting a warning for the one being currently processed. 1948 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1949 1950 logger.warning( 1951 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1952 ) 1953 1954 def _parse_command(self) -> exp.Command: 1955 self._warn_unsupported() 1956 return self.expression( 1957 exp.Command, 1958 comments=self._prev_comments, 1959 this=self._prev.text.upper(), 1960 expression=self._parse_string(), 1961 ) 1962 1963 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1964 """ 1965 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1966 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1967 solve this by setting & resetting the parser state accordingly 1968 """ 1969 index = self._index 1970 error_level = self.error_level 1971 1972 self.error_level = ErrorLevel.IMMEDIATE 1973 try: 1974 this = parse_method() 1975 except ParseError: 1976 this = None 1977 finally: 1978 if not this or retreat: 1979 self._retreat(index) 1980 self.error_level = error_level 1981 1982 return this 1983 1984 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1985 start = self._prev 1986 exists = self._parse_exists() if allow_exists else None 1987 1988 self._match(TokenType.ON) 1989 1990 materialized = self._match_text_seq("MATERIALIZED") 1991 kind = self._match_set(self.CREATABLES) and self._prev 1992 if not kind: 1993 return self._parse_as_command(start) 1994 1995 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1996 this = self._parse_user_defined_function(kind=kind.token_type) 1997 elif kind.token_type == TokenType.TABLE: 1998 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1999 elif kind.token_type == TokenType.COLUMN: 2000 this = self._parse_column() 2001 else: 2002 this = self._parse_id_var() 2003 2004 self._match(TokenType.IS) 2005 2006 return self.expression( 2007 exp.Comment, 2008 this=this, 2009 kind=kind.text, 2010 expression=self._parse_string(), 2011 exists=exists, 2012 materialized=materialized, 2013 ) 2014 2015 def _parse_to_table( 2016 self, 2017 ) -> exp.ToTableProperty: 2018 table = self._parse_table_parts(schema=True) 2019 return self.expression(exp.ToTableProperty, this=table) 2020 2021 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2022 def _parse_ttl(self) -> exp.Expression: 2023 def _parse_ttl_action() -> t.Optional[exp.Expression]: 2024 this = self._parse_bitwise() 2025 2026 if self._match_text_seq("DELETE"): 2027 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 2028 if self._match_text_seq("RECOMPRESS"): 2029 return self.expression( 2030 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 2031 ) 2032 if self._match_text_seq("TO", "DISK"): 2033 return self.expression( 2034 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 2035 ) 2036 if self._match_text_seq("TO", "VOLUME"): 2037 return self.expression( 2038 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 2039 ) 2040 2041 return this 2042 2043 expressions = self._parse_csv(_parse_ttl_action) 2044 where = self._parse_where() 2045 group = self._parse_group() 2046 2047 aggregates = None 2048 if group and self._match(TokenType.SET): 2049 aggregates = self._parse_csv(self._parse_set_item) 2050 2051 return self.expression( 2052 exp.MergeTreeTTL, 2053 expressions=expressions, 2054 where=where, 2055 group=group, 2056 aggregates=aggregates, 2057 ) 2058 2059 def _parse_statement(self) -> t.Optional[exp.Expression]: 2060 if self._curr is None: 2061 return None 2062 2063 if self._match_set(self.STATEMENT_PARSERS): 2064 comments = self._prev_comments 2065 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2066 stmt.add_comments(comments, prepend=True) 2067 return stmt 2068 2069 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2070 return self._parse_command() 2071 2072 expression = self._parse_expression() 2073 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2074 return self._parse_query_modifiers(expression) 2075 2076 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2077 start = self._prev 2078 temporary = self._match(TokenType.TEMPORARY) 2079 materialized = self._match_text_seq("MATERIALIZED") 2080 2081 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2082 if not kind: 2083 return self._parse_as_command(start) 2084 2085 concurrently = self._match_text_seq("CONCURRENTLY") 2086 if_exists = exists or self._parse_exists() 2087 2088 if kind == "COLUMN": 2089 this = self._parse_column() 2090 else: 2091 this = self._parse_table_parts( 2092 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 2093 ) 2094 2095 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2096 2097 if self._match(TokenType.L_PAREN, advance=False): 2098 expressions = self._parse_wrapped_csv(self._parse_types) 2099 else: 2100 expressions = None 2101 2102 return self.expression( 2103 exp.Drop, 2104 exists=if_exists, 2105 this=this, 2106 expressions=expressions, 2107 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2108 temporary=temporary, 2109 materialized=materialized, 2110 cascade=self._match_text_seq("CASCADE"), 2111 constraints=self._match_text_seq("CONSTRAINTS"), 2112 purge=self._match_text_seq("PURGE"), 2113 cluster=cluster, 2114 concurrently=concurrently, 2115 ) 2116 2117 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2118 return ( 2119 self._match_text_seq("IF") 2120 and (not not_ or self._match(TokenType.NOT)) 2121 and self._match(TokenType.EXISTS) 2122 ) 2123 2124 def _parse_create(self) -> exp.Create | exp.Command: 2125 # Note: this can't be None because we've matched a statement parser 2126 start = self._prev 2127 2128 replace = ( 2129 start.token_type == TokenType.REPLACE 2130 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2131 or self._match_pair(TokenType.OR, TokenType.ALTER) 2132 ) 2133 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2134 2135 unique = self._match(TokenType.UNIQUE) 2136 2137 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2138 clustered = True 2139 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2140 "COLUMNSTORE" 2141 ): 2142 clustered = False 2143 else: 2144 clustered = None 2145 2146 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2147 self._advance() 2148 2149 properties = None 2150 create_token = self._match_set(self.CREATABLES) and self._prev 2151 2152 if not create_token: 2153 # exp.Properties.Location.POST_CREATE 2154 properties = self._parse_properties() 2155 create_token = self._match_set(self.CREATABLES) and self._prev 2156 2157 if not properties or not create_token: 2158 return self._parse_as_command(start) 2159 2160 concurrently = self._match_text_seq("CONCURRENTLY") 2161 exists = self._parse_exists(not_=True) 2162 this = None 2163 expression: t.Optional[exp.Expression] = None 2164 indexes = None 2165 no_schema_binding = None 2166 begin = None 2167 end = None 2168 clone = None 2169 2170 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2171 nonlocal properties 2172 if properties and temp_props: 2173 properties.expressions.extend(temp_props.expressions) 2174 elif temp_props: 2175 properties = temp_props 2176 2177 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2178 this = self._parse_user_defined_function(kind=create_token.token_type) 2179 2180 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2181 extend_props(self._parse_properties()) 2182 2183 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2184 extend_props(self._parse_properties()) 2185 2186 if not expression: 2187 if self._match(TokenType.COMMAND): 2188 expression = self._parse_as_command(self._prev) 2189 else: 2190 begin = self._match(TokenType.BEGIN) 2191 return_ = self._match_text_seq("RETURN") 2192 2193 if self._match(TokenType.STRING, advance=False): 2194 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2195 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2196 expression = self._parse_string() 2197 extend_props(self._parse_properties()) 2198 else: 2199 expression = self._parse_user_defined_function_expression() 2200 2201 end = self._match_text_seq("END") 2202 2203 if return_: 2204 expression = self.expression(exp.Return, this=expression) 2205 elif create_token.token_type == TokenType.INDEX: 2206 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2207 if not self._match(TokenType.ON): 2208 index = self._parse_id_var() 2209 anonymous = False 2210 else: 2211 index = None 2212 anonymous = True 2213 2214 this = self._parse_index(index=index, anonymous=anonymous) 2215 elif create_token.token_type in self.DB_CREATABLES: 2216 table_parts = self._parse_table_parts( 2217 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2218 ) 2219 2220 # exp.Properties.Location.POST_NAME 2221 self._match(TokenType.COMMA) 2222 extend_props(self._parse_properties(before=True)) 2223 2224 this = self._parse_schema(this=table_parts) 2225 2226 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2227 extend_props(self._parse_properties()) 2228 2229 has_alias = self._match(TokenType.ALIAS) 2230 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2231 # exp.Properties.Location.POST_ALIAS 2232 extend_props(self._parse_properties()) 2233 2234 if create_token.token_type == TokenType.SEQUENCE: 2235 expression = self._parse_types() 2236 props = self._parse_properties() 2237 if props: 2238 sequence_props = exp.SequenceProperties() 2239 options = [] 2240 for prop in props: 2241 if isinstance(prop, exp.SequenceProperties): 2242 for arg, value in prop.args.items(): 2243 if arg == "options": 2244 options.extend(value) 2245 else: 2246 sequence_props.set(arg, value) 2247 prop.pop() 2248 2249 if options: 2250 sequence_props.set("options", options) 2251 2252 props.append("expressions", sequence_props) 2253 extend_props(props) 2254 else: 2255 expression = self._parse_ddl_select() 2256 2257 # Some dialects also support using a table as an alias instead of a SELECT. 2258 # Here we fallback to this as an alternative. 2259 if not expression and has_alias: 2260 expression = self._try_parse(self._parse_table_parts) 2261 2262 if create_token.token_type == TokenType.TABLE: 2263 # exp.Properties.Location.POST_EXPRESSION 2264 extend_props(self._parse_properties()) 2265 2266 indexes = [] 2267 while True: 2268 index = self._parse_index() 2269 2270 # exp.Properties.Location.POST_INDEX 2271 extend_props(self._parse_properties()) 2272 if not index: 2273 break 2274 else: 2275 self._match(TokenType.COMMA) 2276 indexes.append(index) 2277 elif create_token.token_type == TokenType.VIEW: 2278 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2279 no_schema_binding = True 2280 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2281 extend_props(self._parse_properties()) 2282 2283 shallow = self._match_text_seq("SHALLOW") 2284 2285 if self._match_texts(self.CLONE_KEYWORDS): 2286 copy = self._prev.text.lower() == "copy" 2287 clone = self.expression( 2288 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2289 ) 2290 2291 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2292 return self._parse_as_command(start) 2293 2294 create_kind_text = create_token.text.upper() 2295 return self.expression( 2296 exp.Create, 2297 this=this, 2298 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2299 replace=replace, 2300 refresh=refresh, 2301 unique=unique, 2302 expression=expression, 2303 exists=exists, 2304 properties=properties, 2305 indexes=indexes, 2306 no_schema_binding=no_schema_binding, 2307 begin=begin, 2308 end=end, 2309 clone=clone, 2310 concurrently=concurrently, 2311 clustered=clustered, 2312 ) 2313 2314 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2315 seq = exp.SequenceProperties() 2316 2317 options = [] 2318 index = self._index 2319 2320 while self._curr: 2321 self._match(TokenType.COMMA) 2322 if self._match_text_seq("INCREMENT"): 2323 self._match_text_seq("BY") 2324 self._match_text_seq("=") 2325 seq.set("increment", self._parse_term()) 2326 elif self._match_text_seq("MINVALUE"): 2327 seq.set("minvalue", self._parse_term()) 2328 elif self._match_text_seq("MAXVALUE"): 2329 seq.set("maxvalue", self._parse_term()) 2330 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2331 self._match_text_seq("=") 2332 seq.set("start", self._parse_term()) 2333 elif self._match_text_seq("CACHE"): 2334 # T-SQL allows empty CACHE which is initialized dynamically 2335 seq.set("cache", self._parse_number() or True) 2336 elif self._match_text_seq("OWNED", "BY"): 2337 # "OWNED BY NONE" is the default 2338 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2339 else: 2340 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2341 if opt: 2342 options.append(opt) 2343 else: 2344 break 2345 2346 seq.set("options", options if options else None) 2347 return None if self._index == index else seq 2348 2349 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2350 # only used for teradata currently 2351 self._match(TokenType.COMMA) 2352 2353 kwargs = { 2354 "no": self._match_text_seq("NO"), 2355 "dual": self._match_text_seq("DUAL"), 2356 "before": self._match_text_seq("BEFORE"), 2357 "default": self._match_text_seq("DEFAULT"), 2358 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2359 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2360 "after": self._match_text_seq("AFTER"), 2361 "minimum": self._match_texts(("MIN", "MINIMUM")), 2362 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2363 } 2364 2365 if self._match_texts(self.PROPERTY_PARSERS): 2366 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2367 try: 2368 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2369 except TypeError: 2370 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2371 2372 return None 2373 2374 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2375 return self._parse_wrapped_csv(self._parse_property) 2376 2377 def _parse_property(self) -> t.Optional[exp.Expression]: 2378 if self._match_texts(self.PROPERTY_PARSERS): 2379 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2380 2381 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2382 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2383 2384 if self._match_text_seq("COMPOUND", "SORTKEY"): 2385 return self._parse_sortkey(compound=True) 2386 2387 if self._match_text_seq("SQL", "SECURITY"): 2388 return self.expression( 2389 exp.SqlSecurityProperty, 2390 this=self._match_texts(("DEFINER", "INVOKER")) and self._prev.text.upper(), 2391 ) 2392 2393 index = self._index 2394 2395 seq_props = self._parse_sequence_properties() 2396 if seq_props: 2397 return seq_props 2398 2399 self._retreat(index) 2400 key = self._parse_column() 2401 2402 if not self._match(TokenType.EQ): 2403 self._retreat(index) 2404 return None 2405 2406 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2407 if isinstance(key, exp.Column): 2408 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2409 2410 value = self._parse_bitwise() or self._parse_var(any_token=True) 2411 2412 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2413 if isinstance(value, exp.Column): 2414 value = exp.var(value.name) 2415 2416 return self.expression(exp.Property, this=key, value=value) 2417 2418 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2419 if self._match_text_seq("BY"): 2420 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2421 2422 self._match(TokenType.ALIAS) 2423 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2424 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2425 2426 return self.expression( 2427 exp.FileFormatProperty, 2428 this=( 2429 self.expression( 2430 exp.InputOutputFormat, 2431 input_format=input_format, 2432 output_format=output_format, 2433 ) 2434 if input_format or output_format 2435 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2436 ), 2437 hive_format=True, 2438 ) 2439 2440 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2441 field = self._parse_field() 2442 if isinstance(field, exp.Identifier) and not field.quoted: 2443 field = exp.var(field) 2444 2445 return field 2446 2447 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2448 self._match(TokenType.EQ) 2449 self._match(TokenType.ALIAS) 2450 2451 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2452 2453 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2454 properties = [] 2455 while True: 2456 if before: 2457 prop = self._parse_property_before() 2458 else: 2459 prop = self._parse_property() 2460 if not prop: 2461 break 2462 for p in ensure_list(prop): 2463 properties.append(p) 2464 2465 if properties: 2466 return self.expression(exp.Properties, expressions=properties) 2467 2468 return None 2469 2470 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2471 return self.expression( 2472 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2473 ) 2474 2475 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2476 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2477 security_specifier = self._prev.text.upper() 2478 return self.expression(exp.SecurityProperty, this=security_specifier) 2479 return None 2480 2481 def _parse_settings_property(self) -> exp.SettingsProperty: 2482 return self.expression( 2483 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2484 ) 2485 2486 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2487 if self._index >= 2: 2488 pre_volatile_token = self._tokens[self._index - 2] 2489 else: 2490 pre_volatile_token = None 2491 2492 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2493 return exp.VolatileProperty() 2494 2495 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2496 2497 def _parse_retention_period(self) -> exp.Var: 2498 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2499 number = self._parse_number() 2500 number_str = f"{number} " if number else "" 2501 unit = self._parse_var(any_token=True) 2502 return exp.var(f"{number_str}{unit}") 2503 2504 def _parse_system_versioning_property( 2505 self, with_: bool = False 2506 ) -> exp.WithSystemVersioningProperty: 2507 self._match(TokenType.EQ) 2508 prop = self.expression( 2509 exp.WithSystemVersioningProperty, 2510 on=True, 2511 with_=with_, 2512 ) 2513 2514 if self._match_text_seq("OFF"): 2515 prop.set("on", False) 2516 return prop 2517 2518 self._match(TokenType.ON) 2519 if self._match(TokenType.L_PAREN): 2520 while self._curr and not self._match(TokenType.R_PAREN): 2521 if self._match_text_seq("HISTORY_TABLE", "="): 2522 prop.set("this", self._parse_table_parts()) 2523 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2524 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2525 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2526 prop.set("retention_period", self._parse_retention_period()) 2527 2528 self._match(TokenType.COMMA) 2529 2530 return prop 2531 2532 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2533 self._match(TokenType.EQ) 2534 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2535 prop = self.expression(exp.DataDeletionProperty, on=on) 2536 2537 if self._match(TokenType.L_PAREN): 2538 while self._curr and not self._match(TokenType.R_PAREN): 2539 if self._match_text_seq("FILTER_COLUMN", "="): 2540 prop.set("filter_column", self._parse_column()) 2541 elif self._match_text_seq("RETENTION_PERIOD", "="): 2542 prop.set("retention_period", self._parse_retention_period()) 2543 2544 self._match(TokenType.COMMA) 2545 2546 return prop 2547 2548 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2549 kind = "HASH" 2550 expressions: t.Optional[t.List[exp.Expression]] = None 2551 if self._match_text_seq("BY", "HASH"): 2552 expressions = self._parse_wrapped_csv(self._parse_id_var) 2553 elif self._match_text_seq("BY", "RANDOM"): 2554 kind = "RANDOM" 2555 2556 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2557 buckets: t.Optional[exp.Expression] = None 2558 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2559 buckets = self._parse_number() 2560 2561 return self.expression( 2562 exp.DistributedByProperty, 2563 expressions=expressions, 2564 kind=kind, 2565 buckets=buckets, 2566 order=self._parse_order(), 2567 ) 2568 2569 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2570 self._match_text_seq("KEY") 2571 expressions = self._parse_wrapped_id_vars() 2572 return self.expression(expr_type, expressions=expressions) 2573 2574 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2575 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2576 prop = self._parse_system_versioning_property(with_=True) 2577 self._match_r_paren() 2578 return prop 2579 2580 if self._match(TokenType.L_PAREN, advance=False): 2581 return self._parse_wrapped_properties() 2582 2583 if self._match_text_seq("JOURNAL"): 2584 return self._parse_withjournaltable() 2585 2586 if self._match_texts(self.VIEW_ATTRIBUTES): 2587 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2588 2589 if self._match_text_seq("DATA"): 2590 return self._parse_withdata(no=False) 2591 elif self._match_text_seq("NO", "DATA"): 2592 return self._parse_withdata(no=True) 2593 2594 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2595 return self._parse_serde_properties(with_=True) 2596 2597 if self._match(TokenType.SCHEMA): 2598 return self.expression( 2599 exp.WithSchemaBindingProperty, 2600 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2601 ) 2602 2603 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2604 return self.expression( 2605 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2606 ) 2607 2608 if not self._next: 2609 return None 2610 2611 return self._parse_withisolatedloading() 2612 2613 def _parse_procedure_option(self) -> exp.Expression | None: 2614 if self._match_text_seq("EXECUTE", "AS"): 2615 return self.expression( 2616 exp.ExecuteAsProperty, 2617 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2618 or self._parse_string(), 2619 ) 2620 2621 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2622 2623 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2624 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2625 self._match(TokenType.EQ) 2626 2627 user = self._parse_id_var() 2628 self._match(TokenType.PARAMETER) 2629 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2630 2631 if not user or not host: 2632 return None 2633 2634 return exp.DefinerProperty(this=f"{user}@{host}") 2635 2636 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2637 self._match(TokenType.TABLE) 2638 self._match(TokenType.EQ) 2639 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2640 2641 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2642 return self.expression(exp.LogProperty, no=no) 2643 2644 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2645 return self.expression(exp.JournalProperty, **kwargs) 2646 2647 def _parse_checksum(self) -> exp.ChecksumProperty: 2648 self._match(TokenType.EQ) 2649 2650 on = None 2651 if self._match(TokenType.ON): 2652 on = True 2653 elif self._match_text_seq("OFF"): 2654 on = False 2655 2656 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2657 2658 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2659 return self.expression( 2660 exp.Cluster, 2661 expressions=( 2662 self._parse_wrapped_csv(self._parse_ordered) 2663 if wrapped 2664 else self._parse_csv(self._parse_ordered) 2665 ), 2666 ) 2667 2668 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2669 self._match_text_seq("BY") 2670 2671 self._match_l_paren() 2672 expressions = self._parse_csv(self._parse_column) 2673 self._match_r_paren() 2674 2675 if self._match_text_seq("SORTED", "BY"): 2676 self._match_l_paren() 2677 sorted_by = self._parse_csv(self._parse_ordered) 2678 self._match_r_paren() 2679 else: 2680 sorted_by = None 2681 2682 self._match(TokenType.INTO) 2683 buckets = self._parse_number() 2684 self._match_text_seq("BUCKETS") 2685 2686 return self.expression( 2687 exp.ClusteredByProperty, 2688 expressions=expressions, 2689 sorted_by=sorted_by, 2690 buckets=buckets, 2691 ) 2692 2693 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2694 if not self._match_text_seq("GRANTS"): 2695 self._retreat(self._index - 1) 2696 return None 2697 2698 return self.expression(exp.CopyGrantsProperty) 2699 2700 def _parse_freespace(self) -> exp.FreespaceProperty: 2701 self._match(TokenType.EQ) 2702 return self.expression( 2703 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2704 ) 2705 2706 def _parse_mergeblockratio( 2707 self, no: bool = False, default: bool = False 2708 ) -> exp.MergeBlockRatioProperty: 2709 if self._match(TokenType.EQ): 2710 return self.expression( 2711 exp.MergeBlockRatioProperty, 2712 this=self._parse_number(), 2713 percent=self._match(TokenType.PERCENT), 2714 ) 2715 2716 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2717 2718 def _parse_datablocksize( 2719 self, 2720 default: t.Optional[bool] = None, 2721 minimum: t.Optional[bool] = None, 2722 maximum: t.Optional[bool] = None, 2723 ) -> exp.DataBlocksizeProperty: 2724 self._match(TokenType.EQ) 2725 size = self._parse_number() 2726 2727 units = None 2728 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2729 units = self._prev.text 2730 2731 return self.expression( 2732 exp.DataBlocksizeProperty, 2733 size=size, 2734 units=units, 2735 default=default, 2736 minimum=minimum, 2737 maximum=maximum, 2738 ) 2739 2740 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2741 self._match(TokenType.EQ) 2742 always = self._match_text_seq("ALWAYS") 2743 manual = self._match_text_seq("MANUAL") 2744 never = self._match_text_seq("NEVER") 2745 default = self._match_text_seq("DEFAULT") 2746 2747 autotemp = None 2748 if self._match_text_seq("AUTOTEMP"): 2749 autotemp = self._parse_schema() 2750 2751 return self.expression( 2752 exp.BlockCompressionProperty, 2753 always=always, 2754 manual=manual, 2755 never=never, 2756 default=default, 2757 autotemp=autotemp, 2758 ) 2759 2760 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2761 index = self._index 2762 no = self._match_text_seq("NO") 2763 concurrent = self._match_text_seq("CONCURRENT") 2764 2765 if not self._match_text_seq("ISOLATED", "LOADING"): 2766 self._retreat(index) 2767 return None 2768 2769 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2770 return self.expression( 2771 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2772 ) 2773 2774 def _parse_locking(self) -> exp.LockingProperty: 2775 if self._match(TokenType.TABLE): 2776 kind = "TABLE" 2777 elif self._match(TokenType.VIEW): 2778 kind = "VIEW" 2779 elif self._match(TokenType.ROW): 2780 kind = "ROW" 2781 elif self._match_text_seq("DATABASE"): 2782 kind = "DATABASE" 2783 else: 2784 kind = None 2785 2786 if kind in ("DATABASE", "TABLE", "VIEW"): 2787 this = self._parse_table_parts() 2788 else: 2789 this = None 2790 2791 if self._match(TokenType.FOR): 2792 for_or_in = "FOR" 2793 elif self._match(TokenType.IN): 2794 for_or_in = "IN" 2795 else: 2796 for_or_in = None 2797 2798 if self._match_text_seq("ACCESS"): 2799 lock_type = "ACCESS" 2800 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2801 lock_type = "EXCLUSIVE" 2802 elif self._match_text_seq("SHARE"): 2803 lock_type = "SHARE" 2804 elif self._match_text_seq("READ"): 2805 lock_type = "READ" 2806 elif self._match_text_seq("WRITE"): 2807 lock_type = "WRITE" 2808 elif self._match_text_seq("CHECKSUM"): 2809 lock_type = "CHECKSUM" 2810 else: 2811 lock_type = None 2812 2813 override = self._match_text_seq("OVERRIDE") 2814 2815 return self.expression( 2816 exp.LockingProperty, 2817 this=this, 2818 kind=kind, 2819 for_or_in=for_or_in, 2820 lock_type=lock_type, 2821 override=override, 2822 ) 2823 2824 def _parse_partition_by(self) -> t.List[exp.Expression]: 2825 if self._match(TokenType.PARTITION_BY): 2826 return self._parse_csv(self._parse_disjunction) 2827 return [] 2828 2829 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2830 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2831 if self._match_text_seq("MINVALUE"): 2832 return exp.var("MINVALUE") 2833 if self._match_text_seq("MAXVALUE"): 2834 return exp.var("MAXVALUE") 2835 return self._parse_bitwise() 2836 2837 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2838 expression = None 2839 from_expressions = None 2840 to_expressions = None 2841 2842 if self._match(TokenType.IN): 2843 this = self._parse_wrapped_csv(self._parse_bitwise) 2844 elif self._match(TokenType.FROM): 2845 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2846 self._match_text_seq("TO") 2847 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2848 elif self._match_text_seq("WITH", "(", "MODULUS"): 2849 this = self._parse_number() 2850 self._match_text_seq(",", "REMAINDER") 2851 expression = self._parse_number() 2852 self._match_r_paren() 2853 else: 2854 self.raise_error("Failed to parse partition bound spec.") 2855 2856 return self.expression( 2857 exp.PartitionBoundSpec, 2858 this=this, 2859 expression=expression, 2860 from_expressions=from_expressions, 2861 to_expressions=to_expressions, 2862 ) 2863 2864 # https://www.postgresql.org/docs/current/sql-createtable.html 2865 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2866 if not self._match_text_seq("OF"): 2867 self._retreat(self._index - 1) 2868 return None 2869 2870 this = self._parse_table(schema=True) 2871 2872 if self._match(TokenType.DEFAULT): 2873 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2874 elif self._match_text_seq("FOR", "VALUES"): 2875 expression = self._parse_partition_bound_spec() 2876 else: 2877 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2878 2879 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2880 2881 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2882 self._match(TokenType.EQ) 2883 return self.expression( 2884 exp.PartitionedByProperty, 2885 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2886 ) 2887 2888 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2889 if self._match_text_seq("AND", "STATISTICS"): 2890 statistics = True 2891 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2892 statistics = False 2893 else: 2894 statistics = None 2895 2896 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2897 2898 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2899 if self._match_text_seq("SQL"): 2900 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2901 return None 2902 2903 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2904 if self._match_text_seq("SQL", "DATA"): 2905 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2906 return None 2907 2908 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2909 if self._match_text_seq("PRIMARY", "INDEX"): 2910 return exp.NoPrimaryIndexProperty() 2911 if self._match_text_seq("SQL"): 2912 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2913 return None 2914 2915 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2916 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2917 return exp.OnCommitProperty() 2918 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2919 return exp.OnCommitProperty(delete=True) 2920 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2921 2922 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2923 if self._match_text_seq("SQL", "DATA"): 2924 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2925 return None 2926 2927 def _parse_distkey(self) -> exp.DistKeyProperty: 2928 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2929 2930 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2931 table = self._parse_table(schema=True) 2932 2933 options = [] 2934 while self._match_texts(("INCLUDING", "EXCLUDING")): 2935 this = self._prev.text.upper() 2936 2937 id_var = self._parse_id_var() 2938 if not id_var: 2939 return None 2940 2941 options.append( 2942 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2943 ) 2944 2945 return self.expression(exp.LikeProperty, this=table, expressions=options) 2946 2947 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2948 return self.expression( 2949 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2950 ) 2951 2952 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2953 self._match(TokenType.EQ) 2954 return self.expression( 2955 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2956 ) 2957 2958 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2959 self._match_text_seq("WITH", "CONNECTION") 2960 return self.expression( 2961 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2962 ) 2963 2964 def _parse_returns(self) -> exp.ReturnsProperty: 2965 value: t.Optional[exp.Expression] 2966 null = None 2967 is_table = self._match(TokenType.TABLE) 2968 2969 if is_table: 2970 if self._match(TokenType.LT): 2971 value = self.expression( 2972 exp.Schema, 2973 this="TABLE", 2974 expressions=self._parse_csv(self._parse_struct_types), 2975 ) 2976 if not self._match(TokenType.GT): 2977 self.raise_error("Expecting >") 2978 else: 2979 value = self._parse_schema(exp.var("TABLE")) 2980 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2981 null = True 2982 value = None 2983 else: 2984 value = self._parse_types() 2985 2986 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2987 2988 def _parse_describe(self) -> exp.Describe: 2989 kind = self._match_set(self.CREATABLES) and self._prev.text 2990 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2991 if self._match(TokenType.DOT): 2992 style = None 2993 self._retreat(self._index - 2) 2994 2995 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2996 2997 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2998 this = self._parse_statement() 2999 else: 3000 this = self._parse_table(schema=True) 3001 3002 properties = self._parse_properties() 3003 expressions = properties.expressions if properties else None 3004 partition = self._parse_partition() 3005 return self.expression( 3006 exp.Describe, 3007 this=this, 3008 style=style, 3009 kind=kind, 3010 expressions=expressions, 3011 partition=partition, 3012 format=format, 3013 as_json=self._match_text_seq("AS", "JSON"), 3014 ) 3015 3016 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 3017 kind = self._prev.text.upper() 3018 expressions = [] 3019 3020 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 3021 if self._match(TokenType.WHEN): 3022 expression = self._parse_disjunction() 3023 self._match(TokenType.THEN) 3024 else: 3025 expression = None 3026 3027 else_ = self._match(TokenType.ELSE) 3028 3029 if not self._match(TokenType.INTO): 3030 return None 3031 3032 return self.expression( 3033 exp.ConditionalInsert, 3034 this=self.expression( 3035 exp.Insert, 3036 this=self._parse_table(schema=True), 3037 expression=self._parse_derived_table_values(), 3038 ), 3039 expression=expression, 3040 else_=else_, 3041 ) 3042 3043 expression = parse_conditional_insert() 3044 while expression is not None: 3045 expressions.append(expression) 3046 expression = parse_conditional_insert() 3047 3048 return self.expression( 3049 exp.MultitableInserts, 3050 kind=kind, 3051 comments=comments, 3052 expressions=expressions, 3053 source=self._parse_table(), 3054 ) 3055 3056 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 3057 comments = [] 3058 hint = self._parse_hint() 3059 overwrite = self._match(TokenType.OVERWRITE) 3060 ignore = self._match(TokenType.IGNORE) 3061 local = self._match_text_seq("LOCAL") 3062 alternative = None 3063 is_function = None 3064 3065 if self._match_text_seq("DIRECTORY"): 3066 this: t.Optional[exp.Expression] = self.expression( 3067 exp.Directory, 3068 this=self._parse_var_or_string(), 3069 local=local, 3070 row_format=self._parse_row_format(match_row=True), 3071 ) 3072 else: 3073 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3074 comments += ensure_list(self._prev_comments) 3075 return self._parse_multitable_inserts(comments) 3076 3077 if self._match(TokenType.OR): 3078 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3079 3080 self._match(TokenType.INTO) 3081 comments += ensure_list(self._prev_comments) 3082 self._match(TokenType.TABLE) 3083 is_function = self._match(TokenType.FUNCTION) 3084 3085 this = self._parse_function() if is_function else self._parse_insert_table() 3086 3087 returning = self._parse_returning() # TSQL allows RETURNING before source 3088 3089 return self.expression( 3090 exp.Insert, 3091 comments=comments, 3092 hint=hint, 3093 is_function=is_function, 3094 this=this, 3095 stored=self._match_text_seq("STORED") and self._parse_stored(), 3096 by_name=self._match_text_seq("BY", "NAME"), 3097 exists=self._parse_exists(), 3098 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3099 and self._parse_disjunction(), 3100 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3101 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3102 default=self._match_text_seq("DEFAULT", "VALUES"), 3103 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3104 conflict=self._parse_on_conflict(), 3105 returning=returning or self._parse_returning(), 3106 overwrite=overwrite, 3107 alternative=alternative, 3108 ignore=ignore, 3109 source=self._match(TokenType.TABLE) and self._parse_table(), 3110 ) 3111 3112 def _parse_insert_table(self) -> t.Optional[exp.Expression]: 3113 this = self._parse_table(schema=True, parse_partition=True) 3114 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3115 this.set("alias", self._parse_table_alias()) 3116 return this 3117 3118 def _parse_kill(self) -> exp.Kill: 3119 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3120 3121 return self.expression( 3122 exp.Kill, 3123 this=self._parse_primary(), 3124 kind=kind, 3125 ) 3126 3127 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 3128 conflict = self._match_text_seq("ON", "CONFLICT") 3129 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3130 3131 if not conflict and not duplicate: 3132 return None 3133 3134 conflict_keys = None 3135 constraint = None 3136 3137 if conflict: 3138 if self._match_text_seq("ON", "CONSTRAINT"): 3139 constraint = self._parse_id_var() 3140 elif self._match(TokenType.L_PAREN): 3141 conflict_keys = self._parse_csv(self._parse_id_var) 3142 self._match_r_paren() 3143 3144 index_predicate = self._parse_where() 3145 3146 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3147 if self._prev.token_type == TokenType.UPDATE: 3148 self._match(TokenType.SET) 3149 expressions = self._parse_csv(self._parse_equality) 3150 else: 3151 expressions = None 3152 3153 return self.expression( 3154 exp.OnConflict, 3155 duplicate=duplicate, 3156 expressions=expressions, 3157 action=action, 3158 conflict_keys=conflict_keys, 3159 index_predicate=index_predicate, 3160 constraint=constraint, 3161 where=self._parse_where(), 3162 ) 3163 3164 def _parse_returning(self) -> t.Optional[exp.Returning]: 3165 if not self._match(TokenType.RETURNING): 3166 return None 3167 return self.expression( 3168 exp.Returning, 3169 expressions=self._parse_csv(self._parse_expression), 3170 into=self._match(TokenType.INTO) and self._parse_table_part(), 3171 ) 3172 3173 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3174 if not self._match(TokenType.FORMAT): 3175 return None 3176 return self._parse_row_format() 3177 3178 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3179 index = self._index 3180 with_ = with_ or self._match_text_seq("WITH") 3181 3182 if not self._match(TokenType.SERDE_PROPERTIES): 3183 self._retreat(index) 3184 return None 3185 return self.expression( 3186 exp.SerdeProperties, 3187 expressions=self._parse_wrapped_properties(), 3188 with_=with_, 3189 ) 3190 3191 def _parse_row_format( 3192 self, match_row: bool = False 3193 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3194 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3195 return None 3196 3197 if self._match_text_seq("SERDE"): 3198 this = self._parse_string() 3199 3200 serde_properties = self._parse_serde_properties() 3201 3202 return self.expression( 3203 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3204 ) 3205 3206 self._match_text_seq("DELIMITED") 3207 3208 kwargs = {} 3209 3210 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3211 kwargs["fields"] = self._parse_string() 3212 if self._match_text_seq("ESCAPED", "BY"): 3213 kwargs["escaped"] = self._parse_string() 3214 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3215 kwargs["collection_items"] = self._parse_string() 3216 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3217 kwargs["map_keys"] = self._parse_string() 3218 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3219 kwargs["lines"] = self._parse_string() 3220 if self._match_text_seq("NULL", "DEFINED", "AS"): 3221 kwargs["null"] = self._parse_string() 3222 3223 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3224 3225 def _parse_load(self) -> exp.LoadData | exp.Command: 3226 if self._match_text_seq("DATA"): 3227 local = self._match_text_seq("LOCAL") 3228 self._match_text_seq("INPATH") 3229 inpath = self._parse_string() 3230 overwrite = self._match(TokenType.OVERWRITE) 3231 self._match_pair(TokenType.INTO, TokenType.TABLE) 3232 3233 return self.expression( 3234 exp.LoadData, 3235 this=self._parse_table(schema=True), 3236 local=local, 3237 overwrite=overwrite, 3238 inpath=inpath, 3239 partition=self._parse_partition(), 3240 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3241 serde=self._match_text_seq("SERDE") and self._parse_string(), 3242 ) 3243 return self._parse_as_command(self._prev) 3244 3245 def _parse_delete(self) -> exp.Delete: 3246 # This handles MySQL's "Multiple-Table Syntax" 3247 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3248 tables = None 3249 if not self._match(TokenType.FROM, advance=False): 3250 tables = self._parse_csv(self._parse_table) or None 3251 3252 returning = self._parse_returning() 3253 3254 return self.expression( 3255 exp.Delete, 3256 tables=tables, 3257 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3258 using=self._match(TokenType.USING) 3259 and self._parse_csv(lambda: self._parse_table(joins=True)), 3260 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3261 where=self._parse_where(), 3262 returning=returning or self._parse_returning(), 3263 order=self._parse_order(), 3264 limit=self._parse_limit(), 3265 ) 3266 3267 def _parse_update(self) -> exp.Update: 3268 kwargs: t.Dict[str, t.Any] = { 3269 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3270 } 3271 while self._curr: 3272 if self._match(TokenType.SET): 3273 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3274 elif self._match(TokenType.RETURNING, advance=False): 3275 kwargs["returning"] = self._parse_returning() 3276 elif self._match(TokenType.FROM, advance=False): 3277 from_ = self._parse_from(joins=True) 3278 table = from_.this if from_ else None 3279 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3280 table.set("joins", list(self._parse_joins()) or None) 3281 3282 kwargs["from_"] = from_ 3283 elif self._match(TokenType.WHERE, advance=False): 3284 kwargs["where"] = self._parse_where() 3285 elif self._match(TokenType.ORDER_BY, advance=False): 3286 kwargs["order"] = self._parse_order() 3287 elif self._match(TokenType.LIMIT, advance=False): 3288 kwargs["limit"] = self._parse_limit() 3289 else: 3290 break 3291 3292 return self.expression(exp.Update, **kwargs) 3293 3294 def _parse_use(self) -> exp.Use: 3295 return self.expression( 3296 exp.Use, 3297 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3298 this=self._parse_table(schema=False), 3299 ) 3300 3301 def _parse_uncache(self) -> exp.Uncache: 3302 if not self._match(TokenType.TABLE): 3303 self.raise_error("Expecting TABLE after UNCACHE") 3304 3305 return self.expression( 3306 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3307 ) 3308 3309 def _parse_cache(self) -> exp.Cache: 3310 lazy = self._match_text_seq("LAZY") 3311 self._match(TokenType.TABLE) 3312 table = self._parse_table(schema=True) 3313 3314 options = [] 3315 if self._match_text_seq("OPTIONS"): 3316 self._match_l_paren() 3317 k = self._parse_string() 3318 self._match(TokenType.EQ) 3319 v = self._parse_string() 3320 options = [k, v] 3321 self._match_r_paren() 3322 3323 self._match(TokenType.ALIAS) 3324 return self.expression( 3325 exp.Cache, 3326 this=table, 3327 lazy=lazy, 3328 options=options, 3329 expression=self._parse_select(nested=True), 3330 ) 3331 3332 def _parse_partition(self) -> t.Optional[exp.Partition]: 3333 if not self._match_texts(self.PARTITION_KEYWORDS): 3334 return None 3335 3336 return self.expression( 3337 exp.Partition, 3338 subpartition=self._prev.text.upper() == "SUBPARTITION", 3339 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3340 ) 3341 3342 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3343 def _parse_value_expression() -> t.Optional[exp.Expression]: 3344 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3345 return exp.var(self._prev.text.upper()) 3346 return self._parse_expression() 3347 3348 if self._match(TokenType.L_PAREN): 3349 expressions = self._parse_csv(_parse_value_expression) 3350 self._match_r_paren() 3351 return self.expression(exp.Tuple, expressions=expressions) 3352 3353 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3354 expression = self._parse_expression() 3355 if expression: 3356 return self.expression(exp.Tuple, expressions=[expression]) 3357 return None 3358 3359 def _parse_projections(self) -> t.List[exp.Expression]: 3360 return self._parse_expressions() 3361 3362 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3363 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3364 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3365 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3366 ) 3367 elif self._match(TokenType.FROM): 3368 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3369 # Support parentheses for duckdb FROM-first syntax 3370 select = self._parse_select(from_=from_) 3371 if select: 3372 if not select.args.get("from_"): 3373 select.set("from_", from_) 3374 this = select 3375 else: 3376 this = exp.select("*").from_(t.cast(exp.From, from_)) 3377 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3378 else: 3379 this = ( 3380 self._parse_table(consume_pipe=True) 3381 if table 3382 else self._parse_select(nested=True, parse_set_operation=False) 3383 ) 3384 3385 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3386 # in case a modifier (e.g. join) is following 3387 if table and isinstance(this, exp.Values) and this.alias: 3388 alias = this.args["alias"].pop() 3389 this = exp.Table(this=this, alias=alias) 3390 3391 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3392 3393 return this 3394 3395 def _parse_select( 3396 self, 3397 nested: bool = False, 3398 table: bool = False, 3399 parse_subquery_alias: bool = True, 3400 parse_set_operation: bool = True, 3401 consume_pipe: bool = True, 3402 from_: t.Optional[exp.From] = None, 3403 ) -> t.Optional[exp.Expression]: 3404 query = self._parse_select_query( 3405 nested=nested, 3406 table=table, 3407 parse_subquery_alias=parse_subquery_alias, 3408 parse_set_operation=parse_set_operation, 3409 ) 3410 3411 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3412 if not query and from_: 3413 query = exp.select("*").from_(from_) 3414 if isinstance(query, exp.Query): 3415 query = self._parse_pipe_syntax_query(query) 3416 query = query.subquery(copy=False) if query and table else query 3417 3418 return query 3419 3420 def _parse_select_query( 3421 self, 3422 nested: bool = False, 3423 table: bool = False, 3424 parse_subquery_alias: bool = True, 3425 parse_set_operation: bool = True, 3426 ) -> t.Optional[exp.Expression]: 3427 cte = self._parse_with() 3428 3429 if cte: 3430 this = self._parse_statement() 3431 3432 if not this: 3433 self.raise_error("Failed to parse any statement following CTE") 3434 return cte 3435 3436 while isinstance(this, exp.Subquery) and this.is_wrapper: 3437 this = this.this 3438 3439 if "with_" in this.arg_types: 3440 this.set("with_", cte) 3441 else: 3442 self.raise_error(f"{this.key} does not support CTE") 3443 this = cte 3444 3445 return this 3446 3447 # duckdb supports leading with FROM x 3448 from_ = ( 3449 self._parse_from(joins=True, consume_pipe=True) 3450 if self._match(TokenType.FROM, advance=False) 3451 else None 3452 ) 3453 3454 if self._match(TokenType.SELECT): 3455 comments = self._prev_comments 3456 3457 hint = self._parse_hint() 3458 3459 if self._next and not self._next.token_type == TokenType.DOT: 3460 all_ = self._match(TokenType.ALL) 3461 distinct = self._match_set(self.DISTINCT_TOKENS) 3462 else: 3463 all_, distinct = None, None 3464 3465 kind = ( 3466 self._match(TokenType.ALIAS) 3467 and self._match_texts(("STRUCT", "VALUE")) 3468 and self._prev.text.upper() 3469 ) 3470 3471 if distinct: 3472 distinct = self.expression( 3473 exp.Distinct, 3474 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3475 ) 3476 3477 if all_ and distinct: 3478 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3479 3480 operation_modifiers = [] 3481 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3482 operation_modifiers.append(exp.var(self._prev.text.upper())) 3483 3484 limit = self._parse_limit(top=True) 3485 projections = self._parse_projections() 3486 3487 this = self.expression( 3488 exp.Select, 3489 kind=kind, 3490 hint=hint, 3491 distinct=distinct, 3492 expressions=projections, 3493 limit=limit, 3494 operation_modifiers=operation_modifiers or None, 3495 ) 3496 this.comments = comments 3497 3498 into = self._parse_into() 3499 if into: 3500 this.set("into", into) 3501 3502 if not from_: 3503 from_ = self._parse_from() 3504 3505 if from_: 3506 this.set("from_", from_) 3507 3508 this = self._parse_query_modifiers(this) 3509 elif (table or nested) and self._match(TokenType.L_PAREN): 3510 this = self._parse_wrapped_select(table=table) 3511 3512 # We return early here so that the UNION isn't attached to the subquery by the 3513 # following call to _parse_set_operations, but instead becomes the parent node 3514 self._match_r_paren() 3515 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3516 elif self._match(TokenType.VALUES, advance=False): 3517 this = self._parse_derived_table_values() 3518 elif from_: 3519 this = exp.select("*").from_(from_.this, copy=False) 3520 elif self._match(TokenType.SUMMARIZE): 3521 table = self._match(TokenType.TABLE) 3522 this = self._parse_select() or self._parse_string() or self._parse_table() 3523 return self.expression(exp.Summarize, this=this, table=table) 3524 elif self._match(TokenType.DESCRIBE): 3525 this = self._parse_describe() 3526 else: 3527 this = None 3528 3529 return self._parse_set_operations(this) if parse_set_operation else this 3530 3531 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3532 self._match_text_seq("SEARCH") 3533 3534 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3535 3536 if not kind: 3537 return None 3538 3539 self._match_text_seq("FIRST", "BY") 3540 3541 return self.expression( 3542 exp.RecursiveWithSearch, 3543 kind=kind, 3544 this=self._parse_id_var(), 3545 expression=self._match_text_seq("SET") and self._parse_id_var(), 3546 using=self._match_text_seq("USING") and self._parse_id_var(), 3547 ) 3548 3549 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3550 if not skip_with_token and not self._match(TokenType.WITH): 3551 return None 3552 3553 comments = self._prev_comments 3554 recursive = self._match(TokenType.RECURSIVE) 3555 3556 last_comments = None 3557 expressions = [] 3558 while True: 3559 cte = self._parse_cte() 3560 if isinstance(cte, exp.CTE): 3561 expressions.append(cte) 3562 if last_comments: 3563 cte.add_comments(last_comments) 3564 3565 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3566 break 3567 else: 3568 self._match(TokenType.WITH) 3569 3570 last_comments = self._prev_comments 3571 3572 return self.expression( 3573 exp.With, 3574 comments=comments, 3575 expressions=expressions, 3576 recursive=recursive, 3577 search=self._parse_recursive_with_search(), 3578 ) 3579 3580 def _parse_cte(self) -> t.Optional[exp.CTE]: 3581 index = self._index 3582 3583 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3584 if not alias or not alias.this: 3585 self.raise_error("Expected CTE to have alias") 3586 3587 key_expressions = ( 3588 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3589 ) 3590 3591 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3592 self._retreat(index) 3593 return None 3594 3595 comments = self._prev_comments 3596 3597 if self._match_text_seq("NOT", "MATERIALIZED"): 3598 materialized = False 3599 elif self._match_text_seq("MATERIALIZED"): 3600 materialized = True 3601 else: 3602 materialized = None 3603 3604 cte = self.expression( 3605 exp.CTE, 3606 this=self._parse_wrapped(self._parse_statement), 3607 alias=alias, 3608 materialized=materialized, 3609 key_expressions=key_expressions, 3610 comments=comments, 3611 ) 3612 3613 values = cte.this 3614 if isinstance(values, exp.Values): 3615 if values.alias: 3616 cte.set("this", exp.select("*").from_(values)) 3617 else: 3618 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3619 3620 return cte 3621 3622 def _parse_table_alias( 3623 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3624 ) -> t.Optional[exp.TableAlias]: 3625 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3626 # so this section tries to parse the clause version and if it fails, it treats the token 3627 # as an identifier (alias) 3628 if self._can_parse_limit_or_offset(): 3629 return None 3630 3631 any_token = self._match(TokenType.ALIAS) 3632 alias = ( 3633 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3634 or self._parse_string_as_identifier() 3635 ) 3636 3637 index = self._index 3638 if self._match(TokenType.L_PAREN): 3639 columns = self._parse_csv(self._parse_function_parameter) 3640 self._match_r_paren() if columns else self._retreat(index) 3641 else: 3642 columns = None 3643 3644 if not alias and not columns: 3645 return None 3646 3647 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3648 3649 # We bubble up comments from the Identifier to the TableAlias 3650 if isinstance(alias, exp.Identifier): 3651 table_alias.add_comments(alias.pop_comments()) 3652 3653 return table_alias 3654 3655 def _parse_subquery( 3656 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3657 ) -> t.Optional[exp.Subquery]: 3658 if not this: 3659 return None 3660 3661 return self.expression( 3662 exp.Subquery, 3663 this=this, 3664 pivots=self._parse_pivots(), 3665 alias=self._parse_table_alias() if parse_alias else None, 3666 sample=self._parse_table_sample(), 3667 ) 3668 3669 def _implicit_unnests_to_explicit(self, this: E) -> E: 3670 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3671 3672 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 3673 for i, join in enumerate(this.args.get("joins") or []): 3674 table = join.this 3675 normalized_table = table.copy() 3676 normalized_table.meta["maybe_column"] = True 3677 normalized_table = _norm(normalized_table, dialect=self.dialect) 3678 3679 if isinstance(table, exp.Table) and not join.args.get("on"): 3680 if normalized_table.parts[0].name in refs: 3681 table_as_column = table.to_column() 3682 unnest = exp.Unnest(expressions=[table_as_column]) 3683 3684 # Table.to_column creates a parent Alias node that we want to convert to 3685 # a TableAlias and attach to the Unnest, so it matches the parser's output 3686 if isinstance(table.args.get("alias"), exp.TableAlias): 3687 table_as_column.replace(table_as_column.this) 3688 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3689 3690 table.replace(unnest) 3691 3692 refs.add(normalized_table.alias_or_name) 3693 3694 return this 3695 3696 @t.overload 3697 def _parse_query_modifiers(self, this: E) -> E: ... 3698 3699 @t.overload 3700 def _parse_query_modifiers(self, this: None) -> None: ... 3701 3702 def _parse_query_modifiers(self, this): 3703 if isinstance(this, self.MODIFIABLES): 3704 for join in self._parse_joins(): 3705 this.append("joins", join) 3706 for lateral in iter(self._parse_lateral, None): 3707 this.append("laterals", lateral) 3708 3709 while True: 3710 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3711 modifier_token = self._curr 3712 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3713 key, expression = parser(self) 3714 3715 if expression: 3716 if this.args.get(key): 3717 self.raise_error( 3718 f"Found multiple '{modifier_token.text.upper()}' clauses", 3719 token=modifier_token, 3720 ) 3721 3722 this.set(key, expression) 3723 if key == "limit": 3724 offset = expression.args.get("offset") 3725 expression.set("offset", None) 3726 3727 if offset: 3728 offset = exp.Offset(expression=offset) 3729 this.set("offset", offset) 3730 3731 limit_by_expressions = expression.expressions 3732 expression.set("expressions", None) 3733 offset.set("expressions", limit_by_expressions) 3734 continue 3735 break 3736 3737 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 3738 this = self._implicit_unnests_to_explicit(this) 3739 3740 return this 3741 3742 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3743 start = self._curr 3744 while self._curr: 3745 self._advance() 3746 3747 end = self._tokens[self._index - 1] 3748 return exp.Hint(expressions=[self._find_sql(start, end)]) 3749 3750 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3751 return self._parse_function_call() 3752 3753 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3754 start_index = self._index 3755 should_fallback_to_string = False 3756 3757 hints = [] 3758 try: 3759 for hint in iter( 3760 lambda: self._parse_csv( 3761 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3762 ), 3763 [], 3764 ): 3765 hints.extend(hint) 3766 except ParseError: 3767 should_fallback_to_string = True 3768 3769 if should_fallback_to_string or self._curr: 3770 self._retreat(start_index) 3771 return self._parse_hint_fallback_to_string() 3772 3773 return self.expression(exp.Hint, expressions=hints) 3774 3775 def _parse_hint(self) -> t.Optional[exp.Hint]: 3776 if self._match(TokenType.HINT) and self._prev_comments: 3777 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3778 3779 return None 3780 3781 def _parse_into(self) -> t.Optional[exp.Into]: 3782 if not self._match(TokenType.INTO): 3783 return None 3784 3785 temp = self._match(TokenType.TEMPORARY) 3786 unlogged = self._match_text_seq("UNLOGGED") 3787 self._match(TokenType.TABLE) 3788 3789 return self.expression( 3790 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3791 ) 3792 3793 def _parse_from( 3794 self, 3795 joins: bool = False, 3796 skip_from_token: bool = False, 3797 consume_pipe: bool = False, 3798 ) -> t.Optional[exp.From]: 3799 if not skip_from_token and not self._match(TokenType.FROM): 3800 return None 3801 3802 return self.expression( 3803 exp.From, 3804 comments=self._prev_comments, 3805 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3806 ) 3807 3808 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3809 return self.expression( 3810 exp.MatchRecognizeMeasure, 3811 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3812 this=self._parse_expression(), 3813 ) 3814 3815 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3816 if not self._match(TokenType.MATCH_RECOGNIZE): 3817 return None 3818 3819 self._match_l_paren() 3820 3821 partition = self._parse_partition_by() 3822 order = self._parse_order() 3823 3824 measures = ( 3825 self._parse_csv(self._parse_match_recognize_measure) 3826 if self._match_text_seq("MEASURES") 3827 else None 3828 ) 3829 3830 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3831 rows = exp.var("ONE ROW PER MATCH") 3832 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3833 text = "ALL ROWS PER MATCH" 3834 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3835 text += " SHOW EMPTY MATCHES" 3836 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3837 text += " OMIT EMPTY MATCHES" 3838 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3839 text += " WITH UNMATCHED ROWS" 3840 rows = exp.var(text) 3841 else: 3842 rows = None 3843 3844 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3845 text = "AFTER MATCH SKIP" 3846 if self._match_text_seq("PAST", "LAST", "ROW"): 3847 text += " PAST LAST ROW" 3848 elif self._match_text_seq("TO", "NEXT", "ROW"): 3849 text += " TO NEXT ROW" 3850 elif self._match_text_seq("TO", "FIRST"): 3851 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3852 elif self._match_text_seq("TO", "LAST"): 3853 text += f" TO LAST {self._advance_any().text}" # type: ignore 3854 after = exp.var(text) 3855 else: 3856 after = None 3857 3858 if self._match_text_seq("PATTERN"): 3859 self._match_l_paren() 3860 3861 if not self._curr: 3862 self.raise_error("Expecting )", self._curr) 3863 3864 paren = 1 3865 start = self._curr 3866 3867 while self._curr and paren > 0: 3868 if self._curr.token_type == TokenType.L_PAREN: 3869 paren += 1 3870 if self._curr.token_type == TokenType.R_PAREN: 3871 paren -= 1 3872 3873 end = self._prev 3874 self._advance() 3875 3876 if paren > 0: 3877 self.raise_error("Expecting )", self._curr) 3878 3879 pattern = exp.var(self._find_sql(start, end)) 3880 else: 3881 pattern = None 3882 3883 define = ( 3884 self._parse_csv(self._parse_name_as_expression) 3885 if self._match_text_seq("DEFINE") 3886 else None 3887 ) 3888 3889 self._match_r_paren() 3890 3891 return self.expression( 3892 exp.MatchRecognize, 3893 partition_by=partition, 3894 order=order, 3895 measures=measures, 3896 rows=rows, 3897 after=after, 3898 pattern=pattern, 3899 define=define, 3900 alias=self._parse_table_alias(), 3901 ) 3902 3903 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3904 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3905 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3906 cross_apply = False 3907 3908 if cross_apply is not None: 3909 this = self._parse_select(table=True) 3910 view = None 3911 outer = None 3912 elif self._match(TokenType.LATERAL): 3913 this = self._parse_select(table=True) 3914 view = self._match(TokenType.VIEW) 3915 outer = self._match(TokenType.OUTER) 3916 else: 3917 return None 3918 3919 if not this: 3920 this = ( 3921 self._parse_unnest() 3922 or self._parse_function() 3923 or self._parse_id_var(any_token=False) 3924 ) 3925 3926 while self._match(TokenType.DOT): 3927 this = exp.Dot( 3928 this=this, 3929 expression=self._parse_function() or self._parse_id_var(any_token=False), 3930 ) 3931 3932 ordinality: t.Optional[bool] = None 3933 3934 if view: 3935 table = self._parse_id_var(any_token=False) 3936 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3937 table_alias: t.Optional[exp.TableAlias] = self.expression( 3938 exp.TableAlias, this=table, columns=columns 3939 ) 3940 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3941 # We move the alias from the lateral's child node to the lateral itself 3942 table_alias = this.args["alias"].pop() 3943 else: 3944 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3945 table_alias = self._parse_table_alias() 3946 3947 return self.expression( 3948 exp.Lateral, 3949 this=this, 3950 view=view, 3951 outer=outer, 3952 alias=table_alias, 3953 cross_apply=cross_apply, 3954 ordinality=ordinality, 3955 ) 3956 3957 def _parse_stream(self) -> t.Optional[exp.Stream]: 3958 index = self._index 3959 if self._match_text_seq("STREAM"): 3960 this = self._try_parse(self._parse_table) 3961 if this: 3962 return self.expression(exp.Stream, this=this) 3963 3964 self._retreat(index) 3965 return None 3966 3967 def _parse_join_parts( 3968 self, 3969 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3970 return ( 3971 self._match_set(self.JOIN_METHODS) and self._prev, 3972 self._match_set(self.JOIN_SIDES) and self._prev, 3973 self._match_set(self.JOIN_KINDS) and self._prev, 3974 ) 3975 3976 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3977 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3978 this = self._parse_column() 3979 if isinstance(this, exp.Column): 3980 return this.this 3981 return this 3982 3983 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3984 3985 def _parse_join( 3986 self, skip_join_token: bool = False, parse_bracket: bool = False 3987 ) -> t.Optional[exp.Join]: 3988 if self._match(TokenType.COMMA): 3989 table = self._try_parse(self._parse_table) 3990 cross_join = self.expression(exp.Join, this=table) if table else None 3991 3992 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3993 cross_join.set("kind", "CROSS") 3994 3995 return cross_join 3996 3997 index = self._index 3998 method, side, kind = self._parse_join_parts() 3999 directed = self._match_text_seq("DIRECTED") 4000 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4001 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4002 join_comments = self._prev_comments 4003 4004 if not skip_join_token and not join: 4005 self._retreat(index) 4006 kind = None 4007 method = None 4008 side = None 4009 4010 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4011 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4012 4013 if not skip_join_token and not join and not outer_apply and not cross_apply: 4014 return None 4015 4016 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4017 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4018 kwargs["expressions"] = self._parse_csv( 4019 lambda: self._parse_table(parse_bracket=parse_bracket) 4020 ) 4021 4022 if method: 4023 kwargs["method"] = method.text.upper() 4024 if side: 4025 kwargs["side"] = side.text.upper() 4026 if kind: 4027 kwargs["kind"] = kind.text.upper() 4028 if hint: 4029 kwargs["hint"] = hint 4030 4031 if self._match(TokenType.MATCH_CONDITION): 4032 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4033 4034 if self._match(TokenType.ON): 4035 kwargs["on"] = self._parse_disjunction() 4036 elif self._match(TokenType.USING): 4037 kwargs["using"] = self._parse_using_identifiers() 4038 elif ( 4039 not method 4040 and not (outer_apply or cross_apply) 4041 and not isinstance(kwargs["this"], exp.Unnest) 4042 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4043 ): 4044 index = self._index 4045 joins: t.Optional[list] = list(self._parse_joins()) 4046 4047 if joins and self._match(TokenType.ON): 4048 kwargs["on"] = self._parse_disjunction() 4049 elif joins and self._match(TokenType.USING): 4050 kwargs["using"] = self._parse_using_identifiers() 4051 else: 4052 joins = None 4053 self._retreat(index) 4054 4055 kwargs["this"].set("joins", joins if joins else None) 4056 4057 kwargs["pivots"] = self._parse_pivots() 4058 4059 comments = [c for token in (method, side, kind) if token for c in token.comments] 4060 comments = (join_comments or []) + comments 4061 4062 if ( 4063 self.ADD_JOIN_ON_TRUE 4064 and not kwargs.get("on") 4065 and not kwargs.get("using") 4066 and not kwargs.get("method") 4067 and kwargs.get("kind") in (None, "INNER", "OUTER") 4068 ): 4069 kwargs["on"] = exp.true() 4070 4071 if directed: 4072 kwargs["directed"] = directed 4073 4074 return self.expression(exp.Join, comments=comments, **kwargs) 4075 4076 def _parse_opclass(self) -> t.Optional[exp.Expression]: 4077 this = self._parse_disjunction() 4078 4079 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4080 return this 4081 4082 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4083 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 4084 4085 return this 4086 4087 def _parse_index_params(self) -> exp.IndexParameters: 4088 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4089 4090 if self._match(TokenType.L_PAREN, advance=False): 4091 columns = self._parse_wrapped_csv(self._parse_with_operator) 4092 else: 4093 columns = None 4094 4095 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4096 partition_by = self._parse_partition_by() 4097 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4098 tablespace = ( 4099 self._parse_var(any_token=True) 4100 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4101 else None 4102 ) 4103 where = self._parse_where() 4104 4105 on = self._parse_field() if self._match(TokenType.ON) else None 4106 4107 return self.expression( 4108 exp.IndexParameters, 4109 using=using, 4110 columns=columns, 4111 include=include, 4112 partition_by=partition_by, 4113 where=where, 4114 with_storage=with_storage, 4115 tablespace=tablespace, 4116 on=on, 4117 ) 4118 4119 def _parse_index( 4120 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 4121 ) -> t.Optional[exp.Index]: 4122 if index or anonymous: 4123 unique = None 4124 primary = None 4125 amp = None 4126 4127 self._match(TokenType.ON) 4128 self._match(TokenType.TABLE) # hive 4129 table = self._parse_table_parts(schema=True) 4130 else: 4131 unique = self._match(TokenType.UNIQUE) 4132 primary = self._match_text_seq("PRIMARY") 4133 amp = self._match_text_seq("AMP") 4134 4135 if not self._match(TokenType.INDEX): 4136 return None 4137 4138 index = self._parse_id_var() 4139 table = None 4140 4141 params = self._parse_index_params() 4142 4143 return self.expression( 4144 exp.Index, 4145 this=index, 4146 table=table, 4147 unique=unique, 4148 primary=primary, 4149 amp=amp, 4150 params=params, 4151 ) 4152 4153 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 4154 hints: t.List[exp.Expression] = [] 4155 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4156 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4157 hints.append( 4158 self.expression( 4159 exp.WithTableHint, 4160 expressions=self._parse_csv( 4161 lambda: self._parse_function() or self._parse_var(any_token=True) 4162 ), 4163 ) 4164 ) 4165 self._match_r_paren() 4166 else: 4167 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4168 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4169 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4170 4171 self._match_set((TokenType.INDEX, TokenType.KEY)) 4172 if self._match(TokenType.FOR): 4173 hint.set("target", self._advance_any() and self._prev.text.upper()) 4174 4175 hint.set("expressions", self._parse_wrapped_id_vars()) 4176 hints.append(hint) 4177 4178 return hints or None 4179 4180 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4181 return ( 4182 (not schema and self._parse_function(optional_parens=False)) 4183 or self._parse_id_var(any_token=False) 4184 or self._parse_string_as_identifier() 4185 or self._parse_placeholder() 4186 ) 4187 4188 def _parse_table_parts( 4189 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4190 ) -> exp.Table: 4191 catalog = None 4192 db = None 4193 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4194 4195 while self._match(TokenType.DOT): 4196 if catalog: 4197 # This allows nesting the table in arbitrarily many dot expressions if needed 4198 table = self.expression( 4199 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4200 ) 4201 else: 4202 catalog = db 4203 db = table 4204 # "" used for tsql FROM a..b case 4205 table = self._parse_table_part(schema=schema) or "" 4206 4207 if ( 4208 wildcard 4209 and self._is_connected() 4210 and (isinstance(table, exp.Identifier) or not table) 4211 and self._match(TokenType.STAR) 4212 ): 4213 if isinstance(table, exp.Identifier): 4214 table.args["this"] += "*" 4215 else: 4216 table = exp.Identifier(this="*") 4217 4218 # We bubble up comments from the Identifier to the Table 4219 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4220 4221 if is_db_reference: 4222 catalog = db 4223 db = table 4224 table = None 4225 4226 if not table and not is_db_reference: 4227 self.raise_error(f"Expected table name but got {self._curr}") 4228 if not db and is_db_reference: 4229 self.raise_error(f"Expected database name but got {self._curr}") 4230 4231 table = self.expression( 4232 exp.Table, 4233 comments=comments, 4234 this=table, 4235 db=db, 4236 catalog=catalog, 4237 ) 4238 4239 changes = self._parse_changes() 4240 if changes: 4241 table.set("changes", changes) 4242 4243 at_before = self._parse_historical_data() 4244 if at_before: 4245 table.set("when", at_before) 4246 4247 pivots = self._parse_pivots() 4248 if pivots: 4249 table.set("pivots", pivots) 4250 4251 return table 4252 4253 def _parse_table( 4254 self, 4255 schema: bool = False, 4256 joins: bool = False, 4257 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4258 parse_bracket: bool = False, 4259 is_db_reference: bool = False, 4260 parse_partition: bool = False, 4261 consume_pipe: bool = False, 4262 ) -> t.Optional[exp.Expression]: 4263 stream = self._parse_stream() 4264 if stream: 4265 return stream 4266 4267 lateral = self._parse_lateral() 4268 if lateral: 4269 return lateral 4270 4271 unnest = self._parse_unnest() 4272 if unnest: 4273 return unnest 4274 4275 values = self._parse_derived_table_values() 4276 if values: 4277 return values 4278 4279 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4280 if subquery: 4281 if not subquery.args.get("pivots"): 4282 subquery.set("pivots", self._parse_pivots()) 4283 return subquery 4284 4285 bracket = parse_bracket and self._parse_bracket(None) 4286 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4287 4288 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4289 self._parse_table 4290 ) 4291 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4292 4293 only = self._match(TokenType.ONLY) 4294 4295 this = t.cast( 4296 exp.Expression, 4297 bracket 4298 or rows_from 4299 or self._parse_bracket( 4300 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4301 ), 4302 ) 4303 4304 if only: 4305 this.set("only", only) 4306 4307 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4308 self._match_text_seq("*") 4309 4310 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4311 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4312 this.set("partition", self._parse_partition()) 4313 4314 if schema: 4315 return self._parse_schema(this=this) 4316 4317 version = self._parse_version() 4318 4319 if version: 4320 this.set("version", version) 4321 4322 if self.dialect.ALIAS_POST_TABLESAMPLE: 4323 this.set("sample", self._parse_table_sample()) 4324 4325 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4326 if alias: 4327 this.set("alias", alias) 4328 4329 if self._match(TokenType.INDEXED_BY): 4330 this.set("indexed", self._parse_table_parts()) 4331 elif self._match_text_seq("NOT", "INDEXED"): 4332 this.set("indexed", False) 4333 4334 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4335 return self.expression( 4336 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4337 ) 4338 4339 this.set("hints", self._parse_table_hints()) 4340 4341 if not this.args.get("pivots"): 4342 this.set("pivots", self._parse_pivots()) 4343 4344 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4345 this.set("sample", self._parse_table_sample()) 4346 4347 if joins: 4348 for join in self._parse_joins(): 4349 this.append("joins", join) 4350 4351 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4352 this.set("ordinality", True) 4353 this.set("alias", self._parse_table_alias()) 4354 4355 return this 4356 4357 def _parse_version(self) -> t.Optional[exp.Version]: 4358 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4359 this = "TIMESTAMP" 4360 elif self._match(TokenType.VERSION_SNAPSHOT): 4361 this = "VERSION" 4362 else: 4363 return None 4364 4365 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4366 kind = self._prev.text.upper() 4367 start = self._parse_bitwise() 4368 self._match_texts(("TO", "AND")) 4369 end = self._parse_bitwise() 4370 expression: t.Optional[exp.Expression] = self.expression( 4371 exp.Tuple, expressions=[start, end] 4372 ) 4373 elif self._match_text_seq("CONTAINED", "IN"): 4374 kind = "CONTAINED IN" 4375 expression = self.expression( 4376 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4377 ) 4378 elif self._match(TokenType.ALL): 4379 kind = "ALL" 4380 expression = None 4381 else: 4382 self._match_text_seq("AS", "OF") 4383 kind = "AS OF" 4384 expression = self._parse_type() 4385 4386 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4387 4388 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4389 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4390 index = self._index 4391 historical_data = None 4392 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4393 this = self._prev.text.upper() 4394 kind = ( 4395 self._match(TokenType.L_PAREN) 4396 and self._match_texts(self.HISTORICAL_DATA_KIND) 4397 and self._prev.text.upper() 4398 ) 4399 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4400 4401 if expression: 4402 self._match_r_paren() 4403 historical_data = self.expression( 4404 exp.HistoricalData, this=this, kind=kind, expression=expression 4405 ) 4406 else: 4407 self._retreat(index) 4408 4409 return historical_data 4410 4411 def _parse_changes(self) -> t.Optional[exp.Changes]: 4412 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4413 return None 4414 4415 information = self._parse_var(any_token=True) 4416 self._match_r_paren() 4417 4418 return self.expression( 4419 exp.Changes, 4420 information=information, 4421 at_before=self._parse_historical_data(), 4422 end=self._parse_historical_data(), 4423 ) 4424 4425 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4426 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4427 return None 4428 4429 self._advance() 4430 4431 expressions = self._parse_wrapped_csv(self._parse_equality) 4432 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4433 4434 alias = self._parse_table_alias() if with_alias else None 4435 4436 if alias: 4437 if self.dialect.UNNEST_COLUMN_ONLY: 4438 if alias.args.get("columns"): 4439 self.raise_error("Unexpected extra column alias in unnest.") 4440 4441 alias.set("columns", [alias.this]) 4442 alias.set("this", None) 4443 4444 columns = alias.args.get("columns") or [] 4445 if offset and len(expressions) < len(columns): 4446 offset = columns.pop() 4447 4448 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4449 self._match(TokenType.ALIAS) 4450 offset = self._parse_id_var( 4451 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4452 ) or exp.to_identifier("offset") 4453 4454 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4455 4456 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4457 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4458 if not is_derived and not ( 4459 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4460 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4461 ): 4462 return None 4463 4464 expressions = self._parse_csv(self._parse_value) 4465 alias = self._parse_table_alias() 4466 4467 if is_derived: 4468 self._match_r_paren() 4469 4470 return self.expression( 4471 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4472 ) 4473 4474 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4475 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4476 as_modifier and self._match_text_seq("USING", "SAMPLE") 4477 ): 4478 return None 4479 4480 bucket_numerator = None 4481 bucket_denominator = None 4482 bucket_field = None 4483 percent = None 4484 size = None 4485 seed = None 4486 4487 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4488 matched_l_paren = self._match(TokenType.L_PAREN) 4489 4490 if self.TABLESAMPLE_CSV: 4491 num = None 4492 expressions = self._parse_csv(self._parse_primary) 4493 else: 4494 expressions = None 4495 num = ( 4496 self._parse_factor() 4497 if self._match(TokenType.NUMBER, advance=False) 4498 else self._parse_primary() or self._parse_placeholder() 4499 ) 4500 4501 if self._match_text_seq("BUCKET"): 4502 bucket_numerator = self._parse_number() 4503 self._match_text_seq("OUT", "OF") 4504 bucket_denominator = bucket_denominator = self._parse_number() 4505 self._match(TokenType.ON) 4506 bucket_field = self._parse_field() 4507 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4508 percent = num 4509 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4510 size = num 4511 else: 4512 percent = num 4513 4514 if matched_l_paren: 4515 self._match_r_paren() 4516 4517 if self._match(TokenType.L_PAREN): 4518 method = self._parse_var(upper=True) 4519 seed = self._match(TokenType.COMMA) and self._parse_number() 4520 self._match_r_paren() 4521 elif self._match_texts(("SEED", "REPEATABLE")): 4522 seed = self._parse_wrapped(self._parse_number) 4523 4524 if not method and self.DEFAULT_SAMPLING_METHOD: 4525 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4526 4527 return self.expression( 4528 exp.TableSample, 4529 expressions=expressions, 4530 method=method, 4531 bucket_numerator=bucket_numerator, 4532 bucket_denominator=bucket_denominator, 4533 bucket_field=bucket_field, 4534 percent=percent, 4535 size=size, 4536 seed=seed, 4537 ) 4538 4539 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4540 return list(iter(self._parse_pivot, None)) or None 4541 4542 def _parse_joins(self) -> t.Iterator[exp.Join]: 4543 return iter(self._parse_join, None) 4544 4545 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4546 if not self._match(TokenType.INTO): 4547 return None 4548 4549 return self.expression( 4550 exp.UnpivotColumns, 4551 this=self._match_text_seq("NAME") and self._parse_column(), 4552 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4553 ) 4554 4555 # https://duckdb.org/docs/sql/statements/pivot 4556 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4557 def _parse_on() -> t.Optional[exp.Expression]: 4558 this = self._parse_bitwise() 4559 4560 if self._match(TokenType.IN): 4561 # PIVOT ... ON col IN (row_val1, row_val2) 4562 return self._parse_in(this) 4563 if self._match(TokenType.ALIAS, advance=False): 4564 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4565 return self._parse_alias(this) 4566 4567 return this 4568 4569 this = self._parse_table() 4570 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4571 into = self._parse_unpivot_columns() 4572 using = self._match(TokenType.USING) and self._parse_csv( 4573 lambda: self._parse_alias(self._parse_column()) 4574 ) 4575 group = self._parse_group() 4576 4577 return self.expression( 4578 exp.Pivot, 4579 this=this, 4580 expressions=expressions, 4581 using=using, 4582 group=group, 4583 unpivot=is_unpivot, 4584 into=into, 4585 ) 4586 4587 def _parse_pivot_in(self) -> exp.In: 4588 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4589 this = self._parse_select_or_expression() 4590 4591 self._match(TokenType.ALIAS) 4592 alias = self._parse_bitwise() 4593 if alias: 4594 if isinstance(alias, exp.Column) and not alias.db: 4595 alias = alias.this 4596 return self.expression(exp.PivotAlias, this=this, alias=alias) 4597 4598 return this 4599 4600 value = self._parse_column() 4601 4602 if not self._match(TokenType.IN): 4603 self.raise_error("Expecting IN") 4604 4605 if self._match(TokenType.L_PAREN): 4606 if self._match(TokenType.ANY): 4607 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4608 else: 4609 exprs = self._parse_csv(_parse_aliased_expression) 4610 self._match_r_paren() 4611 return self.expression(exp.In, this=value, expressions=exprs) 4612 4613 return self.expression(exp.In, this=value, field=self._parse_id_var()) 4614 4615 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4616 func = self._parse_function() 4617 if not func: 4618 if self._prev and self._prev.token_type == TokenType.COMMA: 4619 return None 4620 self.raise_error("Expecting an aggregation function in PIVOT") 4621 4622 return self._parse_alias(func) 4623 4624 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4625 index = self._index 4626 include_nulls = None 4627 4628 if self._match(TokenType.PIVOT): 4629 unpivot = False 4630 elif self._match(TokenType.UNPIVOT): 4631 unpivot = True 4632 4633 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4634 if self._match_text_seq("INCLUDE", "NULLS"): 4635 include_nulls = True 4636 elif self._match_text_seq("EXCLUDE", "NULLS"): 4637 include_nulls = False 4638 else: 4639 return None 4640 4641 expressions = [] 4642 4643 if not self._match(TokenType.L_PAREN): 4644 self._retreat(index) 4645 return None 4646 4647 if unpivot: 4648 expressions = self._parse_csv(self._parse_column) 4649 else: 4650 expressions = self._parse_csv(self._parse_pivot_aggregation) 4651 4652 if not expressions: 4653 self.raise_error("Failed to parse PIVOT's aggregation list") 4654 4655 if not self._match(TokenType.FOR): 4656 self.raise_error("Expecting FOR") 4657 4658 fields = [] 4659 while True: 4660 field = self._try_parse(self._parse_pivot_in) 4661 if not field: 4662 break 4663 fields.append(field) 4664 4665 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4666 self._parse_bitwise 4667 ) 4668 4669 group = self._parse_group() 4670 4671 self._match_r_paren() 4672 4673 pivot = self.expression( 4674 exp.Pivot, 4675 expressions=expressions, 4676 fields=fields, 4677 unpivot=unpivot, 4678 include_nulls=include_nulls, 4679 default_on_null=default_on_null, 4680 group=group, 4681 ) 4682 4683 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4684 pivot.set("alias", self._parse_table_alias()) 4685 4686 if not unpivot: 4687 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4688 4689 columns: t.List[exp.Expression] = [] 4690 all_fields = [] 4691 for pivot_field in pivot.fields: 4692 pivot_field_expressions = pivot_field.expressions 4693 4694 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4695 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4696 continue 4697 4698 all_fields.append( 4699 [ 4700 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4701 for fld in pivot_field_expressions 4702 ] 4703 ) 4704 4705 if all_fields: 4706 if names: 4707 all_fields.append(names) 4708 4709 # Generate all possible combinations of the pivot columns 4710 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4711 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4712 for fld_parts_tuple in itertools.product(*all_fields): 4713 fld_parts = list(fld_parts_tuple) 4714 4715 if names and self.PREFIXED_PIVOT_COLUMNS: 4716 # Move the "name" to the front of the list 4717 fld_parts.insert(0, fld_parts.pop(-1)) 4718 4719 columns.append(exp.to_identifier("_".join(fld_parts))) 4720 4721 pivot.set("columns", columns) 4722 4723 return pivot 4724 4725 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4726 return [agg.alias for agg in aggregations if agg.alias] 4727 4728 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4729 if not skip_where_token and not self._match(TokenType.PREWHERE): 4730 return None 4731 4732 return self.expression( 4733 exp.PreWhere, comments=self._prev_comments, this=self._parse_disjunction() 4734 ) 4735 4736 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4737 if not skip_where_token and not self._match(TokenType.WHERE): 4738 return None 4739 4740 return self.expression( 4741 exp.Where, comments=self._prev_comments, this=self._parse_disjunction() 4742 ) 4743 4744 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4745 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4746 return None 4747 comments = self._prev_comments 4748 4749 elements: t.Dict[str, t.Any] = defaultdict(list) 4750 4751 if self._match(TokenType.ALL): 4752 elements["all"] = True 4753 elif self._match(TokenType.DISTINCT): 4754 elements["all"] = False 4755 4756 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4757 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4758 4759 while True: 4760 index = self._index 4761 4762 elements["expressions"].extend( 4763 self._parse_csv( 4764 lambda: None 4765 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4766 else self._parse_disjunction() 4767 ) 4768 ) 4769 4770 before_with_index = self._index 4771 with_prefix = self._match(TokenType.WITH) 4772 4773 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4774 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4775 elements[key].append(cube_or_rollup) 4776 elif grouping_sets := self._parse_grouping_sets(): 4777 elements["grouping_sets"].append(grouping_sets) 4778 elif self._match_text_seq("TOTALS"): 4779 elements["totals"] = True # type: ignore 4780 4781 if before_with_index <= self._index <= before_with_index + 1: 4782 self._retreat(before_with_index) 4783 break 4784 4785 if index == self._index: 4786 break 4787 4788 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4789 4790 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4791 if self._match(TokenType.CUBE): 4792 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4793 elif self._match(TokenType.ROLLUP): 4794 kind = exp.Rollup 4795 else: 4796 return None 4797 4798 return self.expression( 4799 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise) 4800 ) 4801 4802 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4803 if self._match(TokenType.GROUPING_SETS): 4804 return self.expression( 4805 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4806 ) 4807 return None 4808 4809 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4810 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4811 4812 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4813 if not skip_having_token and not self._match(TokenType.HAVING): 4814 return None 4815 return self.expression( 4816 exp.Having, comments=self._prev_comments, this=self._parse_disjunction() 4817 ) 4818 4819 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4820 if not self._match(TokenType.QUALIFY): 4821 return None 4822 return self.expression(exp.Qualify, this=self._parse_disjunction()) 4823 4824 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4825 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4826 exp.Prior, this=self._parse_bitwise() 4827 ) 4828 connect = self._parse_disjunction() 4829 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4830 return connect 4831 4832 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4833 if skip_start_token: 4834 start = None 4835 elif self._match(TokenType.START_WITH): 4836 start = self._parse_disjunction() 4837 else: 4838 return None 4839 4840 self._match(TokenType.CONNECT_BY) 4841 nocycle = self._match_text_seq("NOCYCLE") 4842 connect = self._parse_connect_with_prior() 4843 4844 if not start and self._match(TokenType.START_WITH): 4845 start = self._parse_disjunction() 4846 4847 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4848 4849 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4850 this = self._parse_id_var(any_token=True) 4851 if self._match(TokenType.ALIAS): 4852 this = self.expression(exp.Alias, alias=this, this=self._parse_disjunction()) 4853 return this 4854 4855 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4856 if self._match_text_seq("INTERPOLATE"): 4857 return self._parse_wrapped_csv(self._parse_name_as_expression) 4858 return None 4859 4860 def _parse_order( 4861 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4862 ) -> t.Optional[exp.Expression]: 4863 siblings = None 4864 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4865 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4866 return this 4867 4868 siblings = True 4869 4870 return self.expression( 4871 exp.Order, 4872 comments=self._prev_comments, 4873 this=this, 4874 expressions=self._parse_csv(self._parse_ordered), 4875 siblings=siblings, 4876 ) 4877 4878 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4879 if not self._match(token): 4880 return None 4881 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4882 4883 def _parse_ordered( 4884 self, parse_method: t.Optional[t.Callable] = None 4885 ) -> t.Optional[exp.Ordered]: 4886 this = parse_method() if parse_method else self._parse_disjunction() 4887 if not this: 4888 return None 4889 4890 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4891 this = exp.var("ALL") 4892 4893 asc = self._match(TokenType.ASC) 4894 desc = self._match(TokenType.DESC) or (asc and False) 4895 4896 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4897 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4898 4899 nulls_first = is_nulls_first or False 4900 explicitly_null_ordered = is_nulls_first or is_nulls_last 4901 4902 if ( 4903 not explicitly_null_ordered 4904 and ( 4905 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4906 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4907 ) 4908 and self.dialect.NULL_ORDERING != "nulls_are_last" 4909 ): 4910 nulls_first = True 4911 4912 if self._match_text_seq("WITH", "FILL"): 4913 with_fill = self.expression( 4914 exp.WithFill, 4915 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 4916 to=self._match_text_seq("TO") and self._parse_bitwise(), 4917 step=self._match_text_seq("STEP") and self._parse_bitwise(), 4918 interpolate=self._parse_interpolate(), 4919 ) 4920 else: 4921 with_fill = None 4922 4923 return self.expression( 4924 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4925 ) 4926 4927 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4928 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4929 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4930 self._match_text_seq("ONLY") 4931 with_ties = self._match_text_seq("WITH", "TIES") 4932 4933 if not (percent or rows or with_ties): 4934 return None 4935 4936 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4937 4938 def _parse_limit( 4939 self, 4940 this: t.Optional[exp.Expression] = None, 4941 top: bool = False, 4942 skip_limit_token: bool = False, 4943 ) -> t.Optional[exp.Expression]: 4944 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4945 comments = self._prev_comments 4946 if top: 4947 limit_paren = self._match(TokenType.L_PAREN) 4948 expression = self._parse_term() if limit_paren else self._parse_number() 4949 4950 if limit_paren: 4951 self._match_r_paren() 4952 4953 else: 4954 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4955 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4956 # consume the factor plus parse the percentage separately 4957 index = self._index 4958 expression = self._try_parse(self._parse_term) 4959 if isinstance(expression, exp.Mod): 4960 self._retreat(index) 4961 expression = self._parse_factor() 4962 elif not expression: 4963 expression = self._parse_factor() 4964 limit_options = self._parse_limit_options() 4965 4966 if self._match(TokenType.COMMA): 4967 offset = expression 4968 expression = self._parse_term() 4969 else: 4970 offset = None 4971 4972 limit_exp = self.expression( 4973 exp.Limit, 4974 this=this, 4975 expression=expression, 4976 offset=offset, 4977 comments=comments, 4978 limit_options=limit_options, 4979 expressions=self._parse_limit_by(), 4980 ) 4981 4982 return limit_exp 4983 4984 if self._match(TokenType.FETCH): 4985 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4986 direction = self._prev.text.upper() if direction else "FIRST" 4987 4988 count = self._parse_field(tokens=self.FETCH_TOKENS) 4989 4990 return self.expression( 4991 exp.Fetch, 4992 direction=direction, 4993 count=count, 4994 limit_options=self._parse_limit_options(), 4995 ) 4996 4997 return this 4998 4999 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5000 if not self._match(TokenType.OFFSET): 5001 return this 5002 5003 count = self._parse_term() 5004 self._match_set((TokenType.ROW, TokenType.ROWS)) 5005 5006 return self.expression( 5007 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 5008 ) 5009 5010 def _can_parse_limit_or_offset(self) -> bool: 5011 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5012 return False 5013 5014 index = self._index 5015 result = bool( 5016 self._try_parse(self._parse_limit, retreat=True) 5017 or self._try_parse(self._parse_offset, retreat=True) 5018 ) 5019 self._retreat(index) 5020 5021 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5022 if self._next and self._next.token_type == TokenType.MATCH_CONDITION: 5023 result = False 5024 5025 return result 5026 5027 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 5028 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 5029 5030 def _parse_locks(self) -> t.List[exp.Lock]: 5031 locks = [] 5032 while True: 5033 update, key = None, None 5034 if self._match_text_seq("FOR", "UPDATE"): 5035 update = True 5036 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5037 "LOCK", "IN", "SHARE", "MODE" 5038 ): 5039 update = False 5040 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5041 update, key = False, True 5042 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5043 update, key = True, True 5044 else: 5045 break 5046 5047 expressions = None 5048 if self._match_text_seq("OF"): 5049 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5050 5051 wait: t.Optional[bool | exp.Expression] = None 5052 if self._match_text_seq("NOWAIT"): 5053 wait = True 5054 elif self._match_text_seq("WAIT"): 5055 wait = self._parse_primary() 5056 elif self._match_text_seq("SKIP", "LOCKED"): 5057 wait = False 5058 5059 locks.append( 5060 self.expression( 5061 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 5062 ) 5063 ) 5064 5065 return locks 5066 5067 def parse_set_operation( 5068 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 5069 ) -> t.Optional[exp.Expression]: 5070 start = self._index 5071 _, side_token, kind_token = self._parse_join_parts() 5072 5073 side = side_token.text if side_token else None 5074 kind = kind_token.text if kind_token else None 5075 5076 if not self._match_set(self.SET_OPERATIONS): 5077 self._retreat(start) 5078 return None 5079 5080 token_type = self._prev.token_type 5081 5082 if token_type == TokenType.UNION: 5083 operation: t.Type[exp.SetOperation] = exp.Union 5084 elif token_type == TokenType.EXCEPT: 5085 operation = exp.Except 5086 else: 5087 operation = exp.Intersect 5088 5089 comments = self._prev.comments 5090 5091 if self._match(TokenType.DISTINCT): 5092 distinct: t.Optional[bool] = True 5093 elif self._match(TokenType.ALL): 5094 distinct = False 5095 else: 5096 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5097 if distinct is None: 5098 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5099 5100 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 5101 "STRICT", "CORRESPONDING" 5102 ) 5103 if self._match_text_seq("CORRESPONDING"): 5104 by_name = True 5105 if not side and not kind: 5106 kind = "INNER" 5107 5108 on_column_list = None 5109 if by_name and self._match_texts(("ON", "BY")): 5110 on_column_list = self._parse_wrapped_csv(self._parse_column) 5111 5112 expression = self._parse_select( 5113 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5114 ) 5115 5116 return self.expression( 5117 operation, 5118 comments=comments, 5119 this=this, 5120 distinct=distinct, 5121 by_name=by_name, 5122 expression=expression, 5123 side=side, 5124 kind=kind, 5125 on=on_column_list, 5126 ) 5127 5128 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5129 while this: 5130 setop = self.parse_set_operation(this) 5131 if not setop: 5132 break 5133 this = setop 5134 5135 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5136 expression = this.expression 5137 5138 if expression: 5139 for arg in self.SET_OP_MODIFIERS: 5140 expr = expression.args.get(arg) 5141 if expr: 5142 this.set(arg, expr.pop()) 5143 5144 return this 5145 5146 def _parse_expression(self) -> t.Optional[exp.Expression]: 5147 return self._parse_alias(self._parse_assignment()) 5148 5149 def _parse_assignment(self) -> t.Optional[exp.Expression]: 5150 this = self._parse_disjunction() 5151 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 5152 # This allows us to parse <non-identifier token> := <expr> 5153 this = exp.column( 5154 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5155 ) 5156 5157 while self._match_set(self.ASSIGNMENT): 5158 if isinstance(this, exp.Column) and len(this.parts) == 1: 5159 this = this.this 5160 5161 this = self.expression( 5162 self.ASSIGNMENT[self._prev.token_type], 5163 this=this, 5164 comments=self._prev_comments, 5165 expression=self._parse_assignment(), 5166 ) 5167 5168 return this 5169 5170 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 5171 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 5172 5173 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 5174 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 5175 5176 def _parse_equality(self) -> t.Optional[exp.Expression]: 5177 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 5178 5179 def _parse_comparison(self) -> t.Optional[exp.Expression]: 5180 return self._parse_tokens(self._parse_range, self.COMPARISON) 5181 5182 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5183 this = this or self._parse_bitwise() 5184 negate = self._match(TokenType.NOT) 5185 5186 if self._match_set(self.RANGE_PARSERS): 5187 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5188 if not expression: 5189 return this 5190 5191 this = expression 5192 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5193 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5194 5195 # Postgres supports ISNULL and NOTNULL for conditions. 5196 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5197 if self._match(TokenType.NOTNULL): 5198 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5199 this = self.expression(exp.Not, this=this) 5200 5201 if negate: 5202 this = self._negate_range(this) 5203 5204 if self._match(TokenType.IS): 5205 this = self._parse_is(this) 5206 5207 return this 5208 5209 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5210 if not this: 5211 return this 5212 5213 return self.expression(exp.Not, this=this) 5214 5215 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5216 index = self._index - 1 5217 negate = self._match(TokenType.NOT) 5218 5219 if self._match_text_seq("DISTINCT", "FROM"): 5220 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5221 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5222 5223 if self._match(TokenType.JSON): 5224 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5225 5226 if self._match_text_seq("WITH"): 5227 _with = True 5228 elif self._match_text_seq("WITHOUT"): 5229 _with = False 5230 else: 5231 _with = None 5232 5233 unique = self._match(TokenType.UNIQUE) 5234 self._match_text_seq("KEYS") 5235 expression: t.Optional[exp.Expression] = self.expression( 5236 exp.JSON, 5237 this=kind, 5238 with_=_with, 5239 unique=unique, 5240 ) 5241 else: 5242 expression = self._parse_null() or self._parse_bitwise() 5243 if not expression: 5244 self._retreat(index) 5245 return None 5246 5247 this = self.expression(exp.Is, this=this, expression=expression) 5248 this = self.expression(exp.Not, this=this) if negate else this 5249 return self._parse_column_ops(this) 5250 5251 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5252 unnest = self._parse_unnest(with_alias=False) 5253 if unnest: 5254 this = self.expression(exp.In, this=this, unnest=unnest) 5255 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5256 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5257 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5258 5259 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5260 this = self.expression( 5261 exp.In, 5262 this=this, 5263 query=self._parse_query_modifiers(query).subquery(copy=False), 5264 ) 5265 else: 5266 this = self.expression(exp.In, this=this, expressions=expressions) 5267 5268 if matched_l_paren: 5269 self._match_r_paren(this) 5270 elif not self._match(TokenType.R_BRACKET, expression=this): 5271 self.raise_error("Expecting ]") 5272 else: 5273 this = self.expression(exp.In, this=this, field=self._parse_column()) 5274 5275 return this 5276 5277 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5278 symmetric = None 5279 if self._match_text_seq("SYMMETRIC"): 5280 symmetric = True 5281 elif self._match_text_seq("ASYMMETRIC"): 5282 symmetric = False 5283 5284 low = self._parse_bitwise() 5285 self._match(TokenType.AND) 5286 high = self._parse_bitwise() 5287 5288 return self.expression( 5289 exp.Between, 5290 this=this, 5291 low=low, 5292 high=high, 5293 symmetric=symmetric, 5294 ) 5295 5296 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5297 if not self._match(TokenType.ESCAPE): 5298 return this 5299 return self.expression( 5300 exp.Escape, this=this, expression=self._parse_string() or self._parse_null() 5301 ) 5302 5303 def _parse_interval_span(self, this: exp.Expression) -> exp.Interval: 5304 # handle day-time format interval span with omitted units: 5305 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5306 interval_span_units_omitted = None 5307 if ( 5308 this 5309 and this.is_string 5310 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5311 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5312 ): 5313 index = self._index 5314 5315 # Var "TO" Var 5316 first_unit = self._parse_var(any_token=True, upper=True) 5317 second_unit = None 5318 if first_unit and self._match_text_seq("TO"): 5319 second_unit = self._parse_var(any_token=True, upper=True) 5320 5321 interval_span_units_omitted = not (first_unit and second_unit) 5322 5323 self._retreat(index) 5324 5325 unit = ( 5326 None 5327 if interval_span_units_omitted 5328 else ( 5329 self._parse_function() 5330 or ( 5331 not self._match(TokenType.ALIAS, advance=False) 5332 and self._parse_var(any_token=True, upper=True) 5333 ) 5334 ) 5335 ) 5336 5337 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5338 # each INTERVAL expression into this canonical form so it's easy to transpile 5339 if this and this.is_number: 5340 this = exp.Literal.string(this.to_py()) 5341 elif this and this.is_string: 5342 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5343 if parts and unit: 5344 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5345 unit = None 5346 self._retreat(self._index - 1) 5347 5348 if len(parts) == 1: 5349 this = exp.Literal.string(parts[0][0]) 5350 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5351 5352 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5353 unit = self.expression( 5354 exp.IntervalSpan, 5355 this=unit, 5356 expression=self._parse_function() or self._parse_var(any_token=True, upper=True), 5357 ) 5358 5359 return self.expression(exp.Interval, this=this, unit=unit) 5360 5361 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5362 index = self._index 5363 5364 if not self._match(TokenType.INTERVAL) and match_interval: 5365 return None 5366 5367 if self._match(TokenType.STRING, advance=False): 5368 this = self._parse_primary() 5369 else: 5370 this = self._parse_term() 5371 5372 if not this or ( 5373 isinstance(this, exp.Column) 5374 and not this.table 5375 and not this.this.quoted 5376 and self._curr 5377 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5378 ): 5379 self._retreat(index) 5380 return None 5381 5382 interval = self._parse_interval_span(this) 5383 5384 index = self._index 5385 self._match(TokenType.PLUS) 5386 5387 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5388 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5389 return self.expression( 5390 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5391 ) 5392 5393 self._retreat(index) 5394 return interval 5395 5396 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5397 this = self._parse_term() 5398 5399 while True: 5400 if self._match_set(self.BITWISE): 5401 this = self.expression( 5402 self.BITWISE[self._prev.token_type], 5403 this=this, 5404 expression=self._parse_term(), 5405 ) 5406 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5407 this = self.expression( 5408 exp.DPipe, 5409 this=this, 5410 expression=self._parse_term(), 5411 safe=not self.dialect.STRICT_STRING_CONCAT, 5412 ) 5413 elif self._match(TokenType.DQMARK): 5414 this = self.expression( 5415 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5416 ) 5417 elif self._match_pair(TokenType.LT, TokenType.LT): 5418 this = self.expression( 5419 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5420 ) 5421 elif self._match_pair(TokenType.GT, TokenType.GT): 5422 this = self.expression( 5423 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5424 ) 5425 else: 5426 break 5427 5428 return this 5429 5430 def _parse_term(self) -> t.Optional[exp.Expression]: 5431 this = self._parse_factor() 5432 5433 while self._match_set(self.TERM): 5434 klass = self.TERM[self._prev.token_type] 5435 comments = self._prev_comments 5436 expression = self._parse_factor() 5437 5438 this = self.expression(klass, this=this, comments=comments, expression=expression) 5439 5440 if isinstance(this, exp.Collate): 5441 expr = this.expression 5442 5443 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5444 # fallback to Identifier / Var 5445 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5446 ident = expr.this 5447 if isinstance(ident, exp.Identifier): 5448 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5449 5450 return this 5451 5452 def _parse_factor(self) -> t.Optional[exp.Expression]: 5453 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5454 this = self._parse_at_time_zone(parse_method()) 5455 5456 while self._match_set(self.FACTOR): 5457 klass = self.FACTOR[self._prev.token_type] 5458 comments = self._prev_comments 5459 expression = parse_method() 5460 5461 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5462 self._retreat(self._index - 1) 5463 return this 5464 5465 this = self.expression(klass, this=this, comments=comments, expression=expression) 5466 5467 if isinstance(this, exp.Div): 5468 this.set("typed", self.dialect.TYPED_DIVISION) 5469 this.set("safe", self.dialect.SAFE_DIVISION) 5470 5471 return this 5472 5473 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5474 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5475 5476 def _parse_unary(self) -> t.Optional[exp.Expression]: 5477 if self._match_set(self.UNARY_PARSERS): 5478 return self.UNARY_PARSERS[self._prev.token_type](self) 5479 return self._parse_type() 5480 5481 def _parse_type( 5482 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5483 ) -> t.Optional[exp.Expression]: 5484 interval = parse_interval and self._parse_interval() 5485 if interval: 5486 return self._parse_column_ops(interval) 5487 5488 index = self._index 5489 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5490 5491 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5492 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5493 if isinstance(data_type, exp.Cast): 5494 # This constructor can contain ops directly after it, for instance struct unnesting: 5495 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5496 return self._parse_column_ops(data_type) 5497 5498 if data_type: 5499 index2 = self._index 5500 this = self._parse_primary() 5501 5502 if isinstance(this, exp.Literal): 5503 literal = this.name 5504 this = self._parse_column_ops(this) 5505 5506 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5507 if parser: 5508 return parser(self, this, data_type) 5509 5510 if ( 5511 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5512 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5513 and TIME_ZONE_RE.search(literal) 5514 ): 5515 data_type = exp.DataType.build("TIMESTAMPTZ") 5516 5517 return self.expression(exp.Cast, this=this, to=data_type) 5518 5519 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5520 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5521 # 5522 # If the index difference here is greater than 1, that means the parser itself must have 5523 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5524 # 5525 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5526 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5527 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5528 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5529 # 5530 # In these cases, we don't really want to return the converted type, but instead retreat 5531 # and try to parse a Column or Identifier in the section below. 5532 if data_type.expressions and index2 - index > 1: 5533 self._retreat(index2) 5534 return self._parse_column_ops(data_type) 5535 5536 self._retreat(index) 5537 5538 if fallback_to_identifier: 5539 return self._parse_id_var() 5540 5541 this = self._parse_column() 5542 return this and self._parse_column_ops(this) 5543 5544 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5545 this = self._parse_type() 5546 if not this: 5547 return None 5548 5549 if isinstance(this, exp.Column) and not this.table: 5550 this = exp.var(this.name.upper()) 5551 5552 return self.expression( 5553 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5554 ) 5555 5556 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5557 type_name = identifier.name 5558 5559 while self._match(TokenType.DOT): 5560 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5561 5562 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5563 5564 def _parse_types( 5565 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5566 ) -> t.Optional[exp.Expression]: 5567 index = self._index 5568 5569 this: t.Optional[exp.Expression] = None 5570 prefix = self._match_text_seq("SYSUDTLIB", ".") 5571 5572 if self._match_set(self.TYPE_TOKENS): 5573 type_token = self._prev.token_type 5574 else: 5575 type_token = None 5576 identifier = allow_identifiers and self._parse_id_var( 5577 any_token=False, tokens=(TokenType.VAR,) 5578 ) 5579 if isinstance(identifier, exp.Identifier): 5580 try: 5581 tokens = self.dialect.tokenize(identifier.name) 5582 except TokenError: 5583 tokens = None 5584 5585 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5586 type_token = tokens[0].token_type 5587 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5588 this = self._parse_user_defined_type(identifier) 5589 else: 5590 self._retreat(self._index - 1) 5591 return None 5592 else: 5593 return None 5594 5595 if type_token == TokenType.PSEUDO_TYPE: 5596 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5597 5598 if type_token == TokenType.OBJECT_IDENTIFIER: 5599 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5600 5601 # https://materialize.com/docs/sql/types/map/ 5602 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5603 key_type = self._parse_types( 5604 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5605 ) 5606 if not self._match(TokenType.FARROW): 5607 self._retreat(index) 5608 return None 5609 5610 value_type = self._parse_types( 5611 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5612 ) 5613 if not self._match(TokenType.R_BRACKET): 5614 self._retreat(index) 5615 return None 5616 5617 return exp.DataType( 5618 this=exp.DataType.Type.MAP, 5619 expressions=[key_type, value_type], 5620 nested=True, 5621 prefix=prefix, 5622 ) 5623 5624 nested = type_token in self.NESTED_TYPE_TOKENS 5625 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5626 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5627 expressions = None 5628 maybe_func = False 5629 5630 if self._match(TokenType.L_PAREN): 5631 if is_struct: 5632 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5633 elif nested: 5634 expressions = self._parse_csv( 5635 lambda: self._parse_types( 5636 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5637 ) 5638 ) 5639 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5640 this = expressions[0] 5641 this.set("nullable", True) 5642 self._match_r_paren() 5643 return this 5644 elif type_token in self.ENUM_TYPE_TOKENS: 5645 expressions = self._parse_csv(self._parse_equality) 5646 elif is_aggregate: 5647 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5648 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5649 ) 5650 if not func_or_ident: 5651 return None 5652 expressions = [func_or_ident] 5653 if self._match(TokenType.COMMA): 5654 expressions.extend( 5655 self._parse_csv( 5656 lambda: self._parse_types( 5657 check_func=check_func, 5658 schema=schema, 5659 allow_identifiers=allow_identifiers, 5660 ) 5661 ) 5662 ) 5663 else: 5664 expressions = self._parse_csv(self._parse_type_size) 5665 5666 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5667 if type_token == TokenType.VECTOR and len(expressions) == 2: 5668 expressions = self._parse_vector_expressions(expressions) 5669 5670 if not self._match(TokenType.R_PAREN): 5671 self._retreat(index) 5672 return None 5673 5674 maybe_func = True 5675 5676 values: t.Optional[t.List[exp.Expression]] = None 5677 5678 if nested and self._match(TokenType.LT): 5679 if is_struct: 5680 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5681 else: 5682 expressions = self._parse_csv( 5683 lambda: self._parse_types( 5684 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5685 ) 5686 ) 5687 5688 if not self._match(TokenType.GT): 5689 self.raise_error("Expecting >") 5690 5691 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5692 values = self._parse_csv(self._parse_disjunction) 5693 if not values and is_struct: 5694 values = None 5695 self._retreat(self._index - 1) 5696 else: 5697 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5698 5699 if type_token in self.TIMESTAMPS: 5700 if self._match_text_seq("WITH", "TIME", "ZONE"): 5701 maybe_func = False 5702 tz_type = ( 5703 exp.DataType.Type.TIMETZ 5704 if type_token in self.TIMES 5705 else exp.DataType.Type.TIMESTAMPTZ 5706 ) 5707 this = exp.DataType(this=tz_type, expressions=expressions) 5708 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5709 maybe_func = False 5710 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5711 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5712 maybe_func = False 5713 elif type_token == TokenType.INTERVAL: 5714 if self._curr and self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 5715 unit = self._parse_var(upper=True) 5716 if self._match_text_seq("TO"): 5717 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5718 5719 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5720 else: 5721 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5722 elif type_token == TokenType.VOID: 5723 this = exp.DataType(this=exp.DataType.Type.NULL) 5724 5725 if maybe_func and check_func: 5726 index2 = self._index 5727 peek = self._parse_string() 5728 5729 if not peek: 5730 self._retreat(index) 5731 return None 5732 5733 self._retreat(index2) 5734 5735 if not this: 5736 if self._match_text_seq("UNSIGNED"): 5737 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5738 if not unsigned_type_token: 5739 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5740 5741 type_token = unsigned_type_token or type_token 5742 5743 # NULLABLE without parentheses can be a column (Presto/Trino) 5744 if type_token == TokenType.NULLABLE and not expressions: 5745 self._retreat(index) 5746 return None 5747 5748 this = exp.DataType( 5749 this=exp.DataType.Type[type_token.value], 5750 expressions=expressions, 5751 nested=nested, 5752 prefix=prefix, 5753 ) 5754 5755 # Empty arrays/structs are allowed 5756 if values is not None: 5757 cls = exp.Struct if is_struct else exp.Array 5758 this = exp.cast(cls(expressions=values), this, copy=False) 5759 5760 elif expressions: 5761 this.set("expressions", expressions) 5762 5763 # https://materialize.com/docs/sql/types/list/#type-name 5764 while self._match(TokenType.LIST): 5765 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5766 5767 index = self._index 5768 5769 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5770 matched_array = self._match(TokenType.ARRAY) 5771 5772 while self._curr: 5773 datatype_token = self._prev.token_type 5774 matched_l_bracket = self._match(TokenType.L_BRACKET) 5775 5776 if (not matched_l_bracket and not matched_array) or ( 5777 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5778 ): 5779 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5780 # not to be confused with the fixed size array parsing 5781 break 5782 5783 matched_array = False 5784 values = self._parse_csv(self._parse_disjunction) or None 5785 if ( 5786 values 5787 and not schema 5788 and ( 5789 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 5790 or datatype_token == TokenType.ARRAY 5791 or not self._match(TokenType.R_BRACKET, advance=False) 5792 ) 5793 ): 5794 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5795 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5796 self._retreat(index) 5797 break 5798 5799 this = exp.DataType( 5800 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5801 ) 5802 self._match(TokenType.R_BRACKET) 5803 5804 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5805 converter = self.TYPE_CONVERTERS.get(this.this) 5806 if converter: 5807 this = converter(t.cast(exp.DataType, this)) 5808 5809 return this 5810 5811 def _parse_vector_expressions( 5812 self, expressions: t.List[exp.Expression] 5813 ) -> t.List[exp.Expression]: 5814 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5815 5816 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5817 index = self._index 5818 5819 if ( 5820 self._curr 5821 and self._next 5822 and self._curr.token_type in self.TYPE_TOKENS 5823 and self._next.token_type in self.TYPE_TOKENS 5824 ): 5825 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5826 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5827 this = self._parse_id_var() 5828 else: 5829 this = ( 5830 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5831 or self._parse_id_var() 5832 ) 5833 5834 self._match(TokenType.COLON) 5835 5836 if ( 5837 type_required 5838 and not isinstance(this, exp.DataType) 5839 and not self._match_set(self.TYPE_TOKENS, advance=False) 5840 ): 5841 self._retreat(index) 5842 return self._parse_types() 5843 5844 return self._parse_column_def(this) 5845 5846 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5847 if not self._match_text_seq("AT", "TIME", "ZONE"): 5848 return this 5849 return self._parse_at_time_zone( 5850 self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5851 ) 5852 5853 def _parse_column(self) -> t.Optional[exp.Expression]: 5854 this = self._parse_column_reference() 5855 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5856 5857 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5858 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5859 5860 return column 5861 5862 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5863 this = self._parse_field() 5864 if ( 5865 not this 5866 and self._match(TokenType.VALUES, advance=False) 5867 and self.VALUES_FOLLOWED_BY_PAREN 5868 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5869 ): 5870 this = self._parse_id_var() 5871 5872 if isinstance(this, exp.Identifier): 5873 # We bubble up comments from the Identifier to the Column 5874 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5875 5876 return this 5877 5878 def _parse_colon_as_variant_extract( 5879 self, this: t.Optional[exp.Expression] 5880 ) -> t.Optional[exp.Expression]: 5881 casts = [] 5882 json_path = [] 5883 escape = None 5884 5885 while self._match(TokenType.COLON): 5886 start_index = self._index 5887 5888 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5889 path = self._parse_column_ops( 5890 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5891 ) 5892 5893 # The cast :: operator has a lower precedence than the extraction operator :, so 5894 # we rearrange the AST appropriately to avoid casting the JSON path 5895 while isinstance(path, exp.Cast): 5896 casts.append(path.to) 5897 path = path.this 5898 5899 if casts: 5900 dcolon_offset = next( 5901 i 5902 for i, t in enumerate(self._tokens[start_index:]) 5903 if t.token_type == TokenType.DCOLON 5904 ) 5905 end_token = self._tokens[start_index + dcolon_offset - 1] 5906 else: 5907 end_token = self._prev 5908 5909 if path: 5910 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5911 # it'll roundtrip to a string literal in GET_PATH 5912 if isinstance(path, exp.Identifier) and path.quoted: 5913 escape = True 5914 5915 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5916 5917 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5918 # Databricks transforms it back to the colon/dot notation 5919 if json_path: 5920 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5921 5922 if json_path_expr: 5923 json_path_expr.set("escape", escape) 5924 5925 this = self.expression( 5926 exp.JSONExtract, 5927 this=this, 5928 expression=json_path_expr, 5929 variant_extract=True, 5930 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5931 ) 5932 5933 while casts: 5934 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5935 5936 return this 5937 5938 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5939 return self._parse_types() 5940 5941 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5942 this = self._parse_bracket(this) 5943 5944 while self._match_set(self.COLUMN_OPERATORS): 5945 op_token = self._prev.token_type 5946 op = self.COLUMN_OPERATORS.get(op_token) 5947 5948 if op_token in self.CAST_COLUMN_OPERATORS: 5949 field = self._parse_dcolon() 5950 if not field: 5951 self.raise_error("Expected type") 5952 elif op and self._curr: 5953 field = self._parse_column_reference() or self._parse_bitwise() 5954 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5955 field = self._parse_column_ops(field) 5956 else: 5957 field = self._parse_field(any_token=True, anonymous_func=True) 5958 5959 # Function calls can be qualified, e.g., x.y.FOO() 5960 # This converts the final AST to a series of Dots leading to the function call 5961 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5962 if isinstance(field, (exp.Func, exp.Window)) and this: 5963 this = this.transform( 5964 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5965 ) 5966 5967 if op: 5968 this = op(self, this, field) 5969 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5970 this = self.expression( 5971 exp.Column, 5972 comments=this.comments, 5973 this=field, 5974 table=this.this, 5975 db=this.args.get("table"), 5976 catalog=this.args.get("db"), 5977 ) 5978 elif isinstance(field, exp.Window): 5979 # Move the exp.Dot's to the window's function 5980 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5981 field.set("this", window_func) 5982 this = field 5983 else: 5984 this = self.expression(exp.Dot, this=this, expression=field) 5985 5986 if field and field.comments: 5987 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5988 5989 this = self._parse_bracket(this) 5990 5991 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5992 5993 def _parse_paren(self) -> t.Optional[exp.Expression]: 5994 if not self._match(TokenType.L_PAREN): 5995 return None 5996 5997 comments = self._prev_comments 5998 query = self._parse_select() 5999 6000 if query: 6001 expressions = [query] 6002 else: 6003 expressions = self._parse_expressions() 6004 6005 this = seq_get(expressions, 0) 6006 6007 if not this and self._match(TokenType.R_PAREN, advance=False): 6008 this = self.expression(exp.Tuple) 6009 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6010 this = self._parse_subquery(this=this, parse_alias=False) 6011 elif isinstance(this, (exp.Subquery, exp.Values)): 6012 this = self._parse_subquery( 6013 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6014 parse_alias=False, 6015 ) 6016 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6017 this = self.expression(exp.Tuple, expressions=expressions) 6018 else: 6019 this = self.expression(exp.Paren, this=this) 6020 6021 if this: 6022 this.add_comments(comments) 6023 6024 self._match_r_paren(expression=this) 6025 6026 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6027 return self._parse_window(this) 6028 6029 return this 6030 6031 def _parse_primary(self) -> t.Optional[exp.Expression]: 6032 if self._match_set(self.PRIMARY_PARSERS): 6033 token_type = self._prev.token_type 6034 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6035 6036 if token_type == TokenType.STRING: 6037 expressions = [primary] 6038 while self._match(TokenType.STRING): 6039 expressions.append(exp.Literal.string(self._prev.text)) 6040 6041 if len(expressions) > 1: 6042 return self.expression( 6043 exp.Concat, expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE 6044 ) 6045 6046 return primary 6047 6048 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6049 return exp.Literal.number(f"0.{self._prev.text}") 6050 6051 return self._parse_paren() 6052 6053 def _parse_field( 6054 self, 6055 any_token: bool = False, 6056 tokens: t.Optional[t.Collection[TokenType]] = None, 6057 anonymous_func: bool = False, 6058 ) -> t.Optional[exp.Expression]: 6059 if anonymous_func: 6060 field = ( 6061 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6062 or self._parse_primary() 6063 ) 6064 else: 6065 field = self._parse_primary() or self._parse_function( 6066 anonymous=anonymous_func, any_token=any_token 6067 ) 6068 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6069 6070 def _parse_function( 6071 self, 6072 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6073 anonymous: bool = False, 6074 optional_parens: bool = True, 6075 any_token: bool = False, 6076 ) -> t.Optional[exp.Expression]: 6077 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6078 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6079 fn_syntax = False 6080 if ( 6081 self._match(TokenType.L_BRACE, advance=False) 6082 and self._next 6083 and self._next.text.upper() == "FN" 6084 ): 6085 self._advance(2) 6086 fn_syntax = True 6087 6088 func = self._parse_function_call( 6089 functions=functions, 6090 anonymous=anonymous, 6091 optional_parens=optional_parens, 6092 any_token=any_token, 6093 ) 6094 6095 if fn_syntax: 6096 self._match(TokenType.R_BRACE) 6097 6098 return func 6099 6100 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 6101 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6102 6103 def _parse_function_call( 6104 self, 6105 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6106 anonymous: bool = False, 6107 optional_parens: bool = True, 6108 any_token: bool = False, 6109 ) -> t.Optional[exp.Expression]: 6110 if not self._curr: 6111 return None 6112 6113 comments = self._curr.comments 6114 prev = self._prev 6115 token = self._curr 6116 token_type = self._curr.token_type 6117 this = self._curr.text 6118 upper = this.upper() 6119 6120 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6121 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6122 self._advance() 6123 return self._parse_window(parser(self)) 6124 6125 if not self._next or self._next.token_type != TokenType.L_PAREN: 6126 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6127 self._advance() 6128 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 6129 6130 return None 6131 6132 if any_token: 6133 if token_type in self.RESERVED_TOKENS: 6134 return None 6135 elif token_type not in self.FUNC_TOKENS: 6136 return None 6137 6138 self._advance(2) 6139 6140 parser = self.FUNCTION_PARSERS.get(upper) 6141 if parser and not anonymous: 6142 this = parser(self) 6143 else: 6144 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6145 6146 if subquery_predicate: 6147 expr = None 6148 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 6149 expr = self._parse_select() 6150 self._match_r_paren() 6151 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6152 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6153 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6154 self._advance(-1) 6155 expr = self._parse_bitwise() 6156 6157 if expr: 6158 return self.expression(subquery_predicate, comments=comments, this=expr) 6159 6160 if functions is None: 6161 functions = self.FUNCTIONS 6162 6163 function = functions.get(upper) 6164 known_function = function and not anonymous 6165 6166 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6167 args = self._parse_function_args(alias) 6168 6169 post_func_comments = self._curr and self._curr.comments 6170 if known_function and post_func_comments: 6171 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6172 # call we'll construct it as exp.Anonymous, even if it's "known" 6173 if any( 6174 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6175 for comment in post_func_comments 6176 ): 6177 known_function = False 6178 6179 if alias and known_function: 6180 args = self._kv_to_prop_eq(args) 6181 6182 if known_function: 6183 func_builder = t.cast(t.Callable, function) 6184 6185 if "dialect" in func_builder.__code__.co_varnames: 6186 func = func_builder(args, dialect=self.dialect) 6187 else: 6188 func = func_builder(args) 6189 6190 func = self.validate_expression(func, args) 6191 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6192 func.meta["name"] = this 6193 6194 this = func 6195 else: 6196 if token_type == TokenType.IDENTIFIER: 6197 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6198 6199 this = self.expression(exp.Anonymous, this=this, expressions=args) 6200 6201 this = this.update_positions(token) 6202 6203 if isinstance(this, exp.Expression): 6204 this.add_comments(comments) 6205 6206 self._match_r_paren(this) 6207 return self._parse_window(this) 6208 6209 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6210 return expression 6211 6212 def _kv_to_prop_eq( 6213 self, expressions: t.List[exp.Expression], parse_map: bool = False 6214 ) -> t.List[exp.Expression]: 6215 transformed = [] 6216 6217 for index, e in enumerate(expressions): 6218 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6219 if isinstance(e, exp.Alias): 6220 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6221 6222 if not isinstance(e, exp.PropertyEQ): 6223 e = self.expression( 6224 exp.PropertyEQ, 6225 this=e.this if parse_map else exp.to_identifier(e.this.name), 6226 expression=e.expression, 6227 ) 6228 6229 if isinstance(e.this, exp.Column): 6230 e.this.replace(e.this.this) 6231 else: 6232 e = self._to_prop_eq(e, index) 6233 6234 transformed.append(e) 6235 6236 return transformed 6237 6238 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6239 return self._parse_statement() 6240 6241 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6242 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6243 6244 def _parse_user_defined_function( 6245 self, kind: t.Optional[TokenType] = None 6246 ) -> t.Optional[exp.Expression]: 6247 this = self._parse_table_parts(schema=True) 6248 6249 if not self._match(TokenType.L_PAREN): 6250 return this 6251 6252 expressions = self._parse_csv(self._parse_function_parameter) 6253 self._match_r_paren() 6254 return self.expression( 6255 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6256 ) 6257 6258 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6259 literal = self._parse_primary() 6260 if literal: 6261 return self.expression(exp.Introducer, token=token, expression=literal) 6262 6263 return self._identifier_expression(token) 6264 6265 def _parse_session_parameter(self) -> exp.SessionParameter: 6266 kind = None 6267 this = self._parse_id_var() or self._parse_primary() 6268 6269 if this and self._match(TokenType.DOT): 6270 kind = this.name 6271 this = self._parse_var() or self._parse_primary() 6272 6273 return self.expression(exp.SessionParameter, this=this, kind=kind) 6274 6275 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6276 return self._parse_id_var() 6277 6278 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6279 index = self._index 6280 6281 if self._match(TokenType.L_PAREN): 6282 expressions = t.cast( 6283 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6284 ) 6285 6286 if not self._match(TokenType.R_PAREN): 6287 self._retreat(index) 6288 else: 6289 expressions = [self._parse_lambda_arg()] 6290 6291 if self._match_set(self.LAMBDAS): 6292 return self.LAMBDAS[self._prev.token_type](self, expressions) 6293 6294 self._retreat(index) 6295 6296 this: t.Optional[exp.Expression] 6297 6298 if self._match(TokenType.DISTINCT): 6299 this = self.expression( 6300 exp.Distinct, expressions=self._parse_csv(self._parse_disjunction) 6301 ) 6302 else: 6303 this = self._parse_select_or_expression(alias=alias) 6304 6305 return self._parse_limit( 6306 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6307 ) 6308 6309 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6310 index = self._index 6311 if not self._match(TokenType.L_PAREN): 6312 return this 6313 6314 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6315 # expr can be of both types 6316 if self._match_set(self.SELECT_START_TOKENS): 6317 self._retreat(index) 6318 return this 6319 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6320 self._match_r_paren() 6321 return self.expression(exp.Schema, this=this, expressions=args) 6322 6323 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6324 return self._parse_column_def(self._parse_field(any_token=True)) 6325 6326 def _parse_column_def( 6327 self, this: t.Optional[exp.Expression], computed_column: bool = True 6328 ) -> t.Optional[exp.Expression]: 6329 # column defs are not really columns, they're identifiers 6330 if isinstance(this, exp.Column): 6331 this = this.this 6332 6333 if not computed_column: 6334 self._match(TokenType.ALIAS) 6335 6336 kind = self._parse_types(schema=True) 6337 6338 if self._match_text_seq("FOR", "ORDINALITY"): 6339 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6340 6341 constraints: t.List[exp.Expression] = [] 6342 6343 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6344 ("ALIAS", "MATERIALIZED") 6345 ): 6346 persisted = self._prev.text.upper() == "MATERIALIZED" 6347 constraint_kind = exp.ComputedColumnConstraint( 6348 this=self._parse_disjunction(), 6349 persisted=persisted or self._match_text_seq("PERSISTED"), 6350 data_type=exp.Var(this="AUTO") 6351 if self._match_text_seq("AUTO") 6352 else self._parse_types(), 6353 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6354 ) 6355 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6356 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 6357 in_out_constraint = self.expression( 6358 exp.InOutColumnConstraint, 6359 input_=self._match(TokenType.IN), 6360 output=self._match(TokenType.OUT), 6361 ) 6362 constraints.append(in_out_constraint) 6363 kind = self._parse_types() 6364 elif ( 6365 kind 6366 and self._match(TokenType.ALIAS, advance=False) 6367 and ( 6368 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6369 or (self._next and self._next.token_type == TokenType.L_PAREN) 6370 ) 6371 ): 6372 self._advance() 6373 constraints.append( 6374 self.expression( 6375 exp.ColumnConstraint, 6376 kind=exp.ComputedColumnConstraint( 6377 this=self._parse_disjunction(), 6378 persisted=self._match_texts(("STORED", "VIRTUAL")) 6379 and self._prev.text.upper() == "STORED", 6380 ), 6381 ) 6382 ) 6383 6384 while True: 6385 constraint = self._parse_column_constraint() 6386 if not constraint: 6387 break 6388 constraints.append(constraint) 6389 6390 if not kind and not constraints: 6391 return this 6392 6393 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6394 6395 def _parse_auto_increment( 6396 self, 6397 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6398 start = None 6399 increment = None 6400 order = None 6401 6402 if self._match(TokenType.L_PAREN, advance=False): 6403 args = self._parse_wrapped_csv(self._parse_bitwise) 6404 start = seq_get(args, 0) 6405 increment = seq_get(args, 1) 6406 elif self._match_text_seq("START"): 6407 start = self._parse_bitwise() 6408 self._match_text_seq("INCREMENT") 6409 increment = self._parse_bitwise() 6410 if self._match_text_seq("ORDER"): 6411 order = True 6412 elif self._match_text_seq("NOORDER"): 6413 order = False 6414 6415 if start and increment: 6416 return exp.GeneratedAsIdentityColumnConstraint( 6417 start=start, increment=increment, this=False, order=order 6418 ) 6419 6420 return exp.AutoIncrementColumnConstraint() 6421 6422 def _parse_check_constraint(self) -> t.Optional[exp.CheckColumnConstraint]: 6423 if not self._match(TokenType.L_PAREN, advance=False): 6424 return None 6425 6426 return self.expression( 6427 exp.CheckColumnConstraint, 6428 this=self._parse_wrapped(self._parse_assignment), 6429 enforced=self._match_text_seq("ENFORCED"), 6430 ) 6431 6432 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6433 if not self._match_text_seq("REFRESH"): 6434 self._retreat(self._index - 1) 6435 return None 6436 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6437 6438 def _parse_compress(self) -> exp.CompressColumnConstraint: 6439 if self._match(TokenType.L_PAREN, advance=False): 6440 return self.expression( 6441 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6442 ) 6443 6444 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6445 6446 def _parse_generated_as_identity( 6447 self, 6448 ) -> ( 6449 exp.GeneratedAsIdentityColumnConstraint 6450 | exp.ComputedColumnConstraint 6451 | exp.GeneratedAsRowColumnConstraint 6452 ): 6453 if self._match_text_seq("BY", "DEFAULT"): 6454 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6455 this = self.expression( 6456 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6457 ) 6458 else: 6459 self._match_text_seq("ALWAYS") 6460 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6461 6462 self._match(TokenType.ALIAS) 6463 6464 if self._match_text_seq("ROW"): 6465 start = self._match_text_seq("START") 6466 if not start: 6467 self._match(TokenType.END) 6468 hidden = self._match_text_seq("HIDDEN") 6469 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6470 6471 identity = self._match_text_seq("IDENTITY") 6472 6473 if self._match(TokenType.L_PAREN): 6474 if self._match(TokenType.START_WITH): 6475 this.set("start", self._parse_bitwise()) 6476 if self._match_text_seq("INCREMENT", "BY"): 6477 this.set("increment", self._parse_bitwise()) 6478 if self._match_text_seq("MINVALUE"): 6479 this.set("minvalue", self._parse_bitwise()) 6480 if self._match_text_seq("MAXVALUE"): 6481 this.set("maxvalue", self._parse_bitwise()) 6482 6483 if self._match_text_seq("CYCLE"): 6484 this.set("cycle", True) 6485 elif self._match_text_seq("NO", "CYCLE"): 6486 this.set("cycle", False) 6487 6488 if not identity: 6489 this.set("expression", self._parse_range()) 6490 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6491 args = self._parse_csv(self._parse_bitwise) 6492 this.set("start", seq_get(args, 0)) 6493 this.set("increment", seq_get(args, 1)) 6494 6495 self._match_r_paren() 6496 6497 return this 6498 6499 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6500 self._match_text_seq("LENGTH") 6501 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6502 6503 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6504 if self._match_text_seq("NULL"): 6505 return self.expression(exp.NotNullColumnConstraint) 6506 if self._match_text_seq("CASESPECIFIC"): 6507 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6508 if self._match_text_seq("FOR", "REPLICATION"): 6509 return self.expression(exp.NotForReplicationColumnConstraint) 6510 6511 # Unconsume the `NOT` token 6512 self._retreat(self._index - 1) 6513 return None 6514 6515 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6516 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6517 6518 procedure_option_follows = ( 6519 self._match(TokenType.WITH, advance=False) 6520 and self._next 6521 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6522 ) 6523 6524 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6525 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 6526 if not constraint: 6527 self._retreat(self._index - 1) 6528 return None 6529 6530 return self.expression(exp.ColumnConstraint, this=this, kind=constraint) 6531 6532 return this 6533 6534 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6535 if not self._match(TokenType.CONSTRAINT): 6536 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6537 6538 return self.expression( 6539 exp.Constraint, 6540 this=self._parse_id_var(), 6541 expressions=self._parse_unnamed_constraints(), 6542 ) 6543 6544 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6545 constraints = [] 6546 while True: 6547 constraint = self._parse_unnamed_constraint() or self._parse_function() 6548 if not constraint: 6549 break 6550 constraints.append(constraint) 6551 6552 return constraints 6553 6554 def _parse_unnamed_constraint( 6555 self, constraints: t.Optional[t.Collection[str]] = None 6556 ) -> t.Optional[exp.Expression]: 6557 index = self._index 6558 6559 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6560 constraints or self.CONSTRAINT_PARSERS 6561 ): 6562 return None 6563 6564 constraint = self._prev.text.upper() 6565 if constraint not in self.CONSTRAINT_PARSERS: 6566 self.raise_error(f"No parser found for schema constraint {constraint}.") 6567 6568 constraint = self.CONSTRAINT_PARSERS[constraint](self) 6569 if not constraint: 6570 self._retreat(index) 6571 6572 return constraint 6573 6574 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6575 return self._parse_id_var(any_token=False) 6576 6577 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6578 self._match_texts(("KEY", "INDEX")) 6579 return self.expression( 6580 exp.UniqueColumnConstraint, 6581 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6582 this=self._parse_schema(self._parse_unique_key()), 6583 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6584 on_conflict=self._parse_on_conflict(), 6585 options=self._parse_key_constraint_options(), 6586 ) 6587 6588 def _parse_key_constraint_options(self) -> t.List[str]: 6589 options = [] 6590 while True: 6591 if not self._curr: 6592 break 6593 6594 if self._match(TokenType.ON): 6595 action = None 6596 on = self._advance_any() and self._prev.text 6597 6598 if self._match_text_seq("NO", "ACTION"): 6599 action = "NO ACTION" 6600 elif self._match_text_seq("CASCADE"): 6601 action = "CASCADE" 6602 elif self._match_text_seq("RESTRICT"): 6603 action = "RESTRICT" 6604 elif self._match_pair(TokenType.SET, TokenType.NULL): 6605 action = "SET NULL" 6606 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6607 action = "SET DEFAULT" 6608 else: 6609 self.raise_error("Invalid key constraint") 6610 6611 options.append(f"ON {on} {action}") 6612 else: 6613 var = self._parse_var_from_options( 6614 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6615 ) 6616 if not var: 6617 break 6618 options.append(var.name) 6619 6620 return options 6621 6622 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6623 if match and not self._match(TokenType.REFERENCES): 6624 return None 6625 6626 expressions = None 6627 this = self._parse_table(schema=True) 6628 options = self._parse_key_constraint_options() 6629 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6630 6631 def _parse_foreign_key(self) -> exp.ForeignKey: 6632 expressions = ( 6633 self._parse_wrapped_id_vars() 6634 if not self._match(TokenType.REFERENCES, advance=False) 6635 else None 6636 ) 6637 reference = self._parse_references() 6638 on_options = {} 6639 6640 while self._match(TokenType.ON): 6641 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6642 self.raise_error("Expected DELETE or UPDATE") 6643 6644 kind = self._prev.text.lower() 6645 6646 if self._match_text_seq("NO", "ACTION"): 6647 action = "NO ACTION" 6648 elif self._match(TokenType.SET): 6649 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6650 action = "SET " + self._prev.text.upper() 6651 else: 6652 self._advance() 6653 action = self._prev.text.upper() 6654 6655 on_options[kind] = action 6656 6657 return self.expression( 6658 exp.ForeignKey, 6659 expressions=expressions, 6660 reference=reference, 6661 options=self._parse_key_constraint_options(), 6662 **on_options, # type: ignore 6663 ) 6664 6665 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6666 return self._parse_field() 6667 6668 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6669 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6670 self._retreat(self._index - 1) 6671 return None 6672 6673 id_vars = self._parse_wrapped_id_vars() 6674 return self.expression( 6675 exp.PeriodForSystemTimeConstraint, 6676 this=seq_get(id_vars, 0), 6677 expression=seq_get(id_vars, 1), 6678 ) 6679 6680 def _parse_primary_key( 6681 self, wrapped_optional: bool = False, in_props: bool = False 6682 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6683 desc = ( 6684 self._match_set((TokenType.ASC, TokenType.DESC)) 6685 and self._prev.token_type == TokenType.DESC 6686 ) 6687 6688 this = None 6689 if ( 6690 self._curr.text.upper() not in self.CONSTRAINT_PARSERS 6691 and self._next 6692 and self._next.token_type == TokenType.L_PAREN 6693 ): 6694 this = self._parse_id_var() 6695 6696 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6697 return self.expression( 6698 exp.PrimaryKeyColumnConstraint, 6699 desc=desc, 6700 options=self._parse_key_constraint_options(), 6701 ) 6702 6703 expressions = self._parse_wrapped_csv( 6704 self._parse_primary_key_part, optional=wrapped_optional 6705 ) 6706 6707 return self.expression( 6708 exp.PrimaryKey, 6709 this=this, 6710 expressions=expressions, 6711 include=self._parse_index_params(), 6712 options=self._parse_key_constraint_options(), 6713 ) 6714 6715 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6716 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 6717 6718 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6719 """ 6720 Parses a datetime column in ODBC format. We parse the column into the corresponding 6721 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6722 same as we did for `DATE('yyyy-mm-dd')`. 6723 6724 Reference: 6725 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6726 """ 6727 self._match(TokenType.VAR) 6728 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6729 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6730 if not self._match(TokenType.R_BRACE): 6731 self.raise_error("Expected }") 6732 return expression 6733 6734 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6735 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6736 return this 6737 6738 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6739 map_token = seq_get(self._tokens, self._index - 2) 6740 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6741 else: 6742 parse_map = False 6743 6744 bracket_kind = self._prev.token_type 6745 if ( 6746 bracket_kind == TokenType.L_BRACE 6747 and self._curr 6748 and self._curr.token_type == TokenType.VAR 6749 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6750 ): 6751 return self._parse_odbc_datetime_literal() 6752 6753 expressions = self._parse_csv( 6754 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6755 ) 6756 6757 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6758 self.raise_error("Expected ]") 6759 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6760 self.raise_error("Expected }") 6761 6762 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6763 if bracket_kind == TokenType.L_BRACE: 6764 this = self.expression( 6765 exp.Struct, 6766 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6767 ) 6768 elif not this: 6769 this = build_array_constructor( 6770 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6771 ) 6772 else: 6773 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6774 if constructor_type: 6775 return build_array_constructor( 6776 constructor_type, 6777 args=expressions, 6778 bracket_kind=bracket_kind, 6779 dialect=self.dialect, 6780 ) 6781 6782 expressions = apply_index_offset( 6783 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6784 ) 6785 this = self.expression( 6786 exp.Bracket, 6787 this=this, 6788 expressions=expressions, 6789 comments=this.pop_comments(), 6790 ) 6791 6792 self._add_comments(this) 6793 return self._parse_bracket(this) 6794 6795 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6796 if not self._match(TokenType.COLON): 6797 return this 6798 6799 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 6800 self._advance() 6801 end: t.Optional[exp.Expression] = -exp.Literal.number("1") 6802 else: 6803 end = self._parse_assignment() 6804 step = self._parse_unary() if self._match(TokenType.COLON) else None 6805 return self.expression(exp.Slice, this=this, expression=end, step=step) 6806 6807 def _parse_case(self) -> t.Optional[exp.Expression]: 6808 if self._match(TokenType.DOT, advance=False): 6809 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6810 self._retreat(self._index - 1) 6811 return None 6812 6813 ifs = [] 6814 default = None 6815 6816 comments = self._prev_comments 6817 expression = self._parse_disjunction() 6818 6819 while self._match(TokenType.WHEN): 6820 this = self._parse_disjunction() 6821 self._match(TokenType.THEN) 6822 then = self._parse_disjunction() 6823 ifs.append(self.expression(exp.If, this=this, true=then)) 6824 6825 if self._match(TokenType.ELSE): 6826 default = self._parse_disjunction() 6827 6828 if not self._match(TokenType.END): 6829 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6830 default = exp.column("interval") 6831 else: 6832 self.raise_error("Expected END after CASE", self._prev) 6833 6834 return self.expression( 6835 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6836 ) 6837 6838 def _parse_if(self) -> t.Optional[exp.Expression]: 6839 if self._match(TokenType.L_PAREN): 6840 args = self._parse_csv( 6841 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6842 ) 6843 this = self.validate_expression(exp.If.from_arg_list(args), args) 6844 self._match_r_paren() 6845 else: 6846 index = self._index - 1 6847 6848 if self.NO_PAREN_IF_COMMANDS and index == 0: 6849 return self._parse_as_command(self._prev) 6850 6851 condition = self._parse_disjunction() 6852 6853 if not condition: 6854 self._retreat(index) 6855 return None 6856 6857 self._match(TokenType.THEN) 6858 true = self._parse_disjunction() 6859 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 6860 self._match(TokenType.END) 6861 this = self.expression(exp.If, this=condition, true=true, false=false) 6862 6863 return this 6864 6865 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6866 if not self._match_text_seq("VALUE", "FOR"): 6867 self._retreat(self._index - 1) 6868 return None 6869 6870 return self.expression( 6871 exp.NextValueFor, 6872 this=self._parse_column(), 6873 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6874 ) 6875 6876 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6877 this = self._parse_function() or self._parse_var_or_string(upper=True) 6878 6879 if self._match(TokenType.FROM): 6880 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6881 6882 if not self._match(TokenType.COMMA): 6883 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6884 6885 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6886 6887 def _parse_gap_fill(self) -> exp.GapFill: 6888 self._match(TokenType.TABLE) 6889 this = self._parse_table() 6890 6891 self._match(TokenType.COMMA) 6892 args = [this, *self._parse_csv(self._parse_lambda)] 6893 6894 gap_fill = exp.GapFill.from_arg_list(args) 6895 return self.validate_expression(gap_fill, args) 6896 6897 def _parse_char(self) -> exp.Chr: 6898 return self.expression( 6899 exp.Chr, 6900 expressions=self._parse_csv(self._parse_assignment), 6901 charset=self._match(TokenType.USING) and self._parse_var(), 6902 ) 6903 6904 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6905 this = self._parse_disjunction() 6906 6907 if not self._match(TokenType.ALIAS): 6908 if self._match(TokenType.COMMA): 6909 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6910 6911 self.raise_error("Expected AS after CAST") 6912 6913 fmt = None 6914 to = self._parse_types() 6915 6916 default = self._match(TokenType.DEFAULT) 6917 if default: 6918 default = self._parse_bitwise() 6919 self._match_text_seq("ON", "CONVERSION", "ERROR") 6920 6921 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6922 fmt_string = self._parse_string() 6923 fmt = self._parse_at_time_zone(fmt_string) 6924 6925 if not to: 6926 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6927 if to.this in exp.DataType.TEMPORAL_TYPES: 6928 this = self.expression( 6929 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6930 this=this, 6931 format=exp.Literal.string( 6932 format_time( 6933 fmt_string.this if fmt_string else "", 6934 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6935 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6936 ) 6937 ), 6938 safe=safe, 6939 ) 6940 6941 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6942 this.set("zone", fmt.args["zone"]) 6943 return this 6944 elif not to: 6945 self.raise_error("Expected TYPE after CAST") 6946 elif isinstance(to, exp.Identifier): 6947 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6948 elif to.this == exp.DataType.Type.CHAR: 6949 if self._match(TokenType.CHARACTER_SET): 6950 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6951 6952 return self.build_cast( 6953 strict=strict, 6954 this=this, 6955 to=to, 6956 format=fmt, 6957 safe=safe, 6958 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6959 default=default, 6960 ) 6961 6962 def _parse_string_agg(self) -> exp.GroupConcat: 6963 if self._match(TokenType.DISTINCT): 6964 args: t.List[t.Optional[exp.Expression]] = [ 6965 self.expression(exp.Distinct, expressions=[self._parse_disjunction()]) 6966 ] 6967 if self._match(TokenType.COMMA): 6968 args.extend(self._parse_csv(self._parse_disjunction)) 6969 else: 6970 args = self._parse_csv(self._parse_disjunction) # type: ignore 6971 6972 if self._match_text_seq("ON", "OVERFLOW"): 6973 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6974 if self._match_text_seq("ERROR"): 6975 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6976 else: 6977 self._match_text_seq("TRUNCATE") 6978 on_overflow = self.expression( 6979 exp.OverflowTruncateBehavior, 6980 this=self._parse_string(), 6981 with_count=( 6982 self._match_text_seq("WITH", "COUNT") 6983 or not self._match_text_seq("WITHOUT", "COUNT") 6984 ), 6985 ) 6986 else: 6987 on_overflow = None 6988 6989 index = self._index 6990 if not self._match(TokenType.R_PAREN) and args: 6991 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6992 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6993 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6994 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6995 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6996 6997 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6998 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6999 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7000 if not self._match_text_seq("WITHIN", "GROUP"): 7001 self._retreat(index) 7002 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7003 7004 # The corresponding match_r_paren will be called in parse_function (caller) 7005 self._match_l_paren() 7006 7007 return self.expression( 7008 exp.GroupConcat, 7009 this=self._parse_order(this=seq_get(args, 0)), 7010 separator=seq_get(args, 1), 7011 on_overflow=on_overflow, 7012 ) 7013 7014 def _parse_convert( 7015 self, strict: bool, safe: t.Optional[bool] = None 7016 ) -> t.Optional[exp.Expression]: 7017 this = self._parse_bitwise() 7018 7019 if self._match(TokenType.USING): 7020 to: t.Optional[exp.Expression] = self.expression( 7021 exp.CharacterSet, this=self._parse_var(tokens={TokenType.BINARY}) 7022 ) 7023 elif self._match(TokenType.COMMA): 7024 to = self._parse_types() 7025 else: 7026 to = None 7027 7028 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7029 7030 def _parse_xml_element(self) -> exp.XMLElement: 7031 if self._match_text_seq("EVALNAME"): 7032 evalname = True 7033 this = self._parse_bitwise() 7034 else: 7035 evalname = None 7036 self._match_text_seq("NAME") 7037 this = self._parse_id_var() 7038 7039 return self.expression( 7040 exp.XMLElement, 7041 this=this, 7042 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7043 evalname=evalname, 7044 ) 7045 7046 def _parse_xml_table(self) -> exp.XMLTable: 7047 namespaces = None 7048 passing = None 7049 columns = None 7050 7051 if self._match_text_seq("XMLNAMESPACES", "("): 7052 namespaces = self._parse_xml_namespace() 7053 self._match_text_seq(")", ",") 7054 7055 this = self._parse_string() 7056 7057 if self._match_text_seq("PASSING"): 7058 # The BY VALUE keywords are optional and are provided for semantic clarity 7059 self._match_text_seq("BY", "VALUE") 7060 passing = self._parse_csv(self._parse_column) 7061 7062 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7063 7064 if self._match_text_seq("COLUMNS"): 7065 columns = self._parse_csv(self._parse_field_def) 7066 7067 return self.expression( 7068 exp.XMLTable, 7069 this=this, 7070 namespaces=namespaces, 7071 passing=passing, 7072 columns=columns, 7073 by_ref=by_ref, 7074 ) 7075 7076 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 7077 namespaces = [] 7078 7079 while True: 7080 if self._match(TokenType.DEFAULT): 7081 uri = self._parse_string() 7082 else: 7083 uri = self._parse_alias(self._parse_string()) 7084 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 7085 if not self._match(TokenType.COMMA): 7086 break 7087 7088 return namespaces 7089 7090 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 7091 args = self._parse_csv(self._parse_disjunction) 7092 7093 if len(args) < 3: 7094 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 7095 7096 return self.expression(exp.DecodeCase, expressions=args) 7097 7098 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 7099 self._match_text_seq("KEY") 7100 key = self._parse_column() 7101 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7102 self._match_text_seq("VALUE") 7103 value = self._parse_bitwise() 7104 7105 if not key and not value: 7106 return None 7107 return self.expression(exp.JSONKeyValue, this=key, expression=value) 7108 7109 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7110 if not this or not self._match_text_seq("FORMAT", "JSON"): 7111 return this 7112 7113 return self.expression(exp.FormatJson, this=this) 7114 7115 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 7116 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7117 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7118 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7119 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7120 else: 7121 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7122 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7123 7124 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7125 7126 if not empty and not error and not null: 7127 return None 7128 7129 return self.expression( 7130 exp.OnCondition, 7131 empty=empty, 7132 error=error, 7133 null=null, 7134 ) 7135 7136 def _parse_on_handling( 7137 self, on: str, *values: str 7138 ) -> t.Optional[str] | t.Optional[exp.Expression]: 7139 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7140 for value in values: 7141 if self._match_text_seq(value, "ON", on): 7142 return f"{value} ON {on}" 7143 7144 index = self._index 7145 if self._match(TokenType.DEFAULT): 7146 default_value = self._parse_bitwise() 7147 if self._match_text_seq("ON", on): 7148 return default_value 7149 7150 self._retreat(index) 7151 7152 return None 7153 7154 @t.overload 7155 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 7156 7157 @t.overload 7158 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 7159 7160 def _parse_json_object(self, agg=False): 7161 star = self._parse_star() 7162 expressions = ( 7163 [star] 7164 if star 7165 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7166 ) 7167 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7168 7169 unique_keys = None 7170 if self._match_text_seq("WITH", "UNIQUE"): 7171 unique_keys = True 7172 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7173 unique_keys = False 7174 7175 self._match_text_seq("KEYS") 7176 7177 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7178 self._parse_type() 7179 ) 7180 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7181 7182 return self.expression( 7183 exp.JSONObjectAgg if agg else exp.JSONObject, 7184 expressions=expressions, 7185 null_handling=null_handling, 7186 unique_keys=unique_keys, 7187 return_type=return_type, 7188 encoding=encoding, 7189 ) 7190 7191 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7192 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7193 if not self._match_text_seq("NESTED"): 7194 this = self._parse_id_var() 7195 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7196 kind = self._parse_types(allow_identifiers=False) 7197 nested = None 7198 else: 7199 this = None 7200 ordinality = None 7201 kind = None 7202 nested = True 7203 7204 path = self._match_text_seq("PATH") and self._parse_string() 7205 nested_schema = nested and self._parse_json_schema() 7206 7207 return self.expression( 7208 exp.JSONColumnDef, 7209 this=this, 7210 kind=kind, 7211 path=path, 7212 nested_schema=nested_schema, 7213 ordinality=ordinality, 7214 ) 7215 7216 def _parse_json_schema(self) -> exp.JSONSchema: 7217 self._match_text_seq("COLUMNS") 7218 return self.expression( 7219 exp.JSONSchema, 7220 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 7221 ) 7222 7223 def _parse_json_table(self) -> exp.JSONTable: 7224 this = self._parse_format_json(self._parse_bitwise()) 7225 path = self._match(TokenType.COMMA) and self._parse_string() 7226 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7227 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7228 schema = self._parse_json_schema() 7229 7230 return exp.JSONTable( 7231 this=this, 7232 schema=schema, 7233 path=path, 7234 error_handling=error_handling, 7235 empty_handling=empty_handling, 7236 ) 7237 7238 def _parse_match_against(self) -> exp.MatchAgainst: 7239 if self._match_text_seq("TABLE"): 7240 # parse SingleStore MATCH(TABLE ...) syntax 7241 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7242 expressions = [] 7243 table = self._parse_table() 7244 if table: 7245 expressions = [table] 7246 else: 7247 expressions = self._parse_csv(self._parse_column) 7248 7249 self._match_text_seq(")", "AGAINST", "(") 7250 7251 this = self._parse_string() 7252 7253 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7254 modifier = "IN NATURAL LANGUAGE MODE" 7255 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7256 modifier = f"{modifier} WITH QUERY EXPANSION" 7257 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7258 modifier = "IN BOOLEAN MODE" 7259 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7260 modifier = "WITH QUERY EXPANSION" 7261 else: 7262 modifier = None 7263 7264 return self.expression( 7265 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 7266 ) 7267 7268 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7269 def _parse_open_json(self) -> exp.OpenJSON: 7270 this = self._parse_bitwise() 7271 path = self._match(TokenType.COMMA) and self._parse_string() 7272 7273 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7274 this = self._parse_field(any_token=True) 7275 kind = self._parse_types() 7276 path = self._parse_string() 7277 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7278 7279 return self.expression( 7280 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7281 ) 7282 7283 expressions = None 7284 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7285 self._match_l_paren() 7286 expressions = self._parse_csv(_parse_open_json_column_def) 7287 7288 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7289 7290 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7291 args = self._parse_csv(self._parse_bitwise) 7292 7293 if self._match(TokenType.IN): 7294 return self.expression( 7295 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7296 ) 7297 7298 if haystack_first: 7299 haystack = seq_get(args, 0) 7300 needle = seq_get(args, 1) 7301 else: 7302 haystack = seq_get(args, 1) 7303 needle = seq_get(args, 0) 7304 7305 return self.expression( 7306 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7307 ) 7308 7309 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7310 args = self._parse_csv(self._parse_table) 7311 return exp.JoinHint(this=func_name.upper(), expressions=args) 7312 7313 def _parse_substring(self) -> exp.Substring: 7314 # Postgres supports the form: substring(string [from int] [for int]) 7315 # (despite being undocumented, the reverse order also works) 7316 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7317 7318 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7319 7320 start, length = None, None 7321 7322 while self._curr: 7323 if self._match(TokenType.FROM): 7324 start = self._parse_bitwise() 7325 elif self._match(TokenType.FOR): 7326 if not start: 7327 start = exp.Literal.number(1) 7328 length = self._parse_bitwise() 7329 else: 7330 break 7331 7332 if start: 7333 args.append(start) 7334 if length: 7335 args.append(length) 7336 7337 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7338 7339 def _parse_trim(self) -> exp.Trim: 7340 # https://www.w3resource.com/sql/character-functions/trim.php 7341 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7342 7343 position = None 7344 collation = None 7345 expression = None 7346 7347 if self._match_texts(self.TRIM_TYPES): 7348 position = self._prev.text.upper() 7349 7350 this = self._parse_bitwise() 7351 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7352 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7353 expression = self._parse_bitwise() 7354 7355 if invert_order: 7356 this, expression = expression, this 7357 7358 if self._match(TokenType.COLLATE): 7359 collation = self._parse_bitwise() 7360 7361 return self.expression( 7362 exp.Trim, this=this, position=position, expression=expression, collation=collation 7363 ) 7364 7365 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7366 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7367 7368 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7369 return self._parse_window(self._parse_id_var(), alias=True) 7370 7371 def _parse_respect_or_ignore_nulls( 7372 self, this: t.Optional[exp.Expression] 7373 ) -> t.Optional[exp.Expression]: 7374 if self._match_text_seq("IGNORE", "NULLS"): 7375 return self.expression(exp.IgnoreNulls, this=this) 7376 if self._match_text_seq("RESPECT", "NULLS"): 7377 return self.expression(exp.RespectNulls, this=this) 7378 return this 7379 7380 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7381 if self._match(TokenType.HAVING): 7382 self._match_texts(("MAX", "MIN")) 7383 max = self._prev.text.upper() != "MIN" 7384 return self.expression( 7385 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7386 ) 7387 7388 return this 7389 7390 def _parse_window( 7391 self, this: t.Optional[exp.Expression], alias: bool = False 7392 ) -> t.Optional[exp.Expression]: 7393 func = this 7394 comments = func.comments if isinstance(func, exp.Expression) else None 7395 7396 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7397 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7398 if self._match_text_seq("WITHIN", "GROUP"): 7399 order = self._parse_wrapped(self._parse_order) 7400 this = self.expression(exp.WithinGroup, this=this, expression=order) 7401 7402 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7403 self._match(TokenType.WHERE) 7404 this = self.expression( 7405 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7406 ) 7407 self._match_r_paren() 7408 7409 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7410 # Some dialects choose to implement and some do not. 7411 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7412 7413 # There is some code above in _parse_lambda that handles 7414 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7415 7416 # The below changes handle 7417 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7418 7419 # Oracle allows both formats 7420 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7421 # and Snowflake chose to do the same for familiarity 7422 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7423 if isinstance(this, exp.AggFunc): 7424 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7425 7426 if ignore_respect and ignore_respect is not this: 7427 ignore_respect.replace(ignore_respect.this) 7428 this = self.expression(ignore_respect.__class__, this=this) 7429 7430 this = self._parse_respect_or_ignore_nulls(this) 7431 7432 # bigquery select from window x AS (partition by ...) 7433 if alias: 7434 over = None 7435 self._match(TokenType.ALIAS) 7436 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7437 return this 7438 else: 7439 over = self._prev.text.upper() 7440 7441 if comments and isinstance(func, exp.Expression): 7442 func.pop_comments() 7443 7444 if not self._match(TokenType.L_PAREN): 7445 return self.expression( 7446 exp.Window, 7447 comments=comments, 7448 this=this, 7449 alias=self._parse_id_var(False), 7450 over=over, 7451 ) 7452 7453 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7454 7455 first = self._match(TokenType.FIRST) 7456 if self._match_text_seq("LAST"): 7457 first = False 7458 7459 partition, order = self._parse_partition_and_order() 7460 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7461 7462 if kind: 7463 self._match(TokenType.BETWEEN) 7464 start = self._parse_window_spec() 7465 7466 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7467 exclude = ( 7468 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7469 if self._match_text_seq("EXCLUDE") 7470 else None 7471 ) 7472 7473 spec = self.expression( 7474 exp.WindowSpec, 7475 kind=kind, 7476 start=start["value"], 7477 start_side=start["side"], 7478 end=end.get("value"), 7479 end_side=end.get("side"), 7480 exclude=exclude, 7481 ) 7482 else: 7483 spec = None 7484 7485 self._match_r_paren() 7486 7487 window = self.expression( 7488 exp.Window, 7489 comments=comments, 7490 this=this, 7491 partition_by=partition, 7492 order=order, 7493 spec=spec, 7494 alias=window_alias, 7495 over=over, 7496 first=first, 7497 ) 7498 7499 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7500 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7501 return self._parse_window(window, alias=alias) 7502 7503 return window 7504 7505 def _parse_partition_and_order( 7506 self, 7507 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7508 return self._parse_partition_by(), self._parse_order() 7509 7510 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7511 self._match(TokenType.BETWEEN) 7512 7513 return { 7514 "value": ( 7515 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7516 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7517 or self._parse_bitwise() 7518 ), 7519 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7520 } 7521 7522 def _parse_alias( 7523 self, this: t.Optional[exp.Expression], explicit: bool = False 7524 ) -> t.Optional[exp.Expression]: 7525 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7526 # so this section tries to parse the clause version and if it fails, it treats the token 7527 # as an identifier (alias) 7528 if self._can_parse_limit_or_offset(): 7529 return this 7530 7531 any_token = self._match(TokenType.ALIAS) 7532 comments = self._prev_comments or [] 7533 7534 if explicit and not any_token: 7535 return this 7536 7537 if self._match(TokenType.L_PAREN): 7538 aliases = self.expression( 7539 exp.Aliases, 7540 comments=comments, 7541 this=this, 7542 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7543 ) 7544 self._match_r_paren(aliases) 7545 return aliases 7546 7547 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7548 self.STRING_ALIASES and self._parse_string_as_identifier() 7549 ) 7550 7551 if alias: 7552 comments.extend(alias.pop_comments()) 7553 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7554 column = this.this 7555 7556 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7557 if not this.comments and column and column.comments: 7558 this.comments = column.pop_comments() 7559 7560 return this 7561 7562 def _parse_id_var( 7563 self, 7564 any_token: bool = True, 7565 tokens: t.Optional[t.Collection[TokenType]] = None, 7566 ) -> t.Optional[exp.Expression]: 7567 expression = self._parse_identifier() 7568 if not expression and ( 7569 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7570 ): 7571 quoted = self._prev.token_type == TokenType.STRING 7572 expression = self._identifier_expression(quoted=quoted) 7573 7574 return expression 7575 7576 def _parse_string(self) -> t.Optional[exp.Expression]: 7577 if self._match_set(self.STRING_PARSERS): 7578 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7579 return self._parse_placeholder() 7580 7581 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7582 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7583 if output: 7584 output.update_positions(self._prev) 7585 return output 7586 7587 def _parse_number(self) -> t.Optional[exp.Expression]: 7588 if self._match_set(self.NUMERIC_PARSERS): 7589 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7590 return self._parse_placeholder() 7591 7592 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7593 if self._match(TokenType.IDENTIFIER): 7594 return self._identifier_expression(quoted=True) 7595 return self._parse_placeholder() 7596 7597 def _parse_var( 7598 self, 7599 any_token: bool = False, 7600 tokens: t.Optional[t.Collection[TokenType]] = None, 7601 upper: bool = False, 7602 ) -> t.Optional[exp.Expression]: 7603 if ( 7604 (any_token and self._advance_any()) 7605 or self._match(TokenType.VAR) 7606 or (self._match_set(tokens) if tokens else False) 7607 ): 7608 return self.expression( 7609 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7610 ) 7611 return self._parse_placeholder() 7612 7613 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7614 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7615 self._advance() 7616 return self._prev 7617 return None 7618 7619 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7620 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7621 7622 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7623 return self._parse_primary() or self._parse_var(any_token=True) 7624 7625 def _parse_null(self) -> t.Optional[exp.Expression]: 7626 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7627 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7628 return self._parse_placeholder() 7629 7630 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7631 if self._match(TokenType.TRUE): 7632 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7633 if self._match(TokenType.FALSE): 7634 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7635 return self._parse_placeholder() 7636 7637 def _parse_star(self) -> t.Optional[exp.Expression]: 7638 if self._match(TokenType.STAR): 7639 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7640 return self._parse_placeholder() 7641 7642 def _parse_parameter(self) -> exp.Parameter: 7643 this = self._parse_identifier() or self._parse_primary_or_var() 7644 return self.expression(exp.Parameter, this=this) 7645 7646 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7647 if self._match_set(self.PLACEHOLDER_PARSERS): 7648 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7649 if placeholder: 7650 return placeholder 7651 self._advance(-1) 7652 return None 7653 7654 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7655 if not self._match_texts(keywords): 7656 return None 7657 if self._match(TokenType.L_PAREN, advance=False): 7658 return self._parse_wrapped_csv(self._parse_expression) 7659 7660 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 7661 return [expression] if expression else None 7662 7663 def _parse_csv( 7664 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7665 ) -> t.List[exp.Expression]: 7666 parse_result = parse_method() 7667 items = [parse_result] if parse_result is not None else [] 7668 7669 while self._match(sep): 7670 self._add_comments(parse_result) 7671 parse_result = parse_method() 7672 if parse_result is not None: 7673 items.append(parse_result) 7674 7675 return items 7676 7677 def _parse_tokens( 7678 self, parse_method: t.Callable, expressions: t.Dict 7679 ) -> t.Optional[exp.Expression]: 7680 this = parse_method() 7681 7682 while self._match_set(expressions): 7683 this = self.expression( 7684 expressions[self._prev.token_type], 7685 this=this, 7686 comments=self._prev_comments, 7687 expression=parse_method(), 7688 ) 7689 7690 return this 7691 7692 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7693 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7694 7695 def _parse_wrapped_csv( 7696 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7697 ) -> t.List[exp.Expression]: 7698 return self._parse_wrapped( 7699 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7700 ) 7701 7702 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7703 wrapped = self._match(TokenType.L_PAREN) 7704 if not wrapped and not optional: 7705 self.raise_error("Expecting (") 7706 parse_result = parse_method() 7707 if wrapped: 7708 self._match_r_paren() 7709 return parse_result 7710 7711 def _parse_expressions(self) -> t.List[exp.Expression]: 7712 return self._parse_csv(self._parse_expression) 7713 7714 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7715 return ( 7716 self._parse_set_operations( 7717 self._parse_alias(self._parse_assignment(), explicit=True) 7718 if alias 7719 else self._parse_assignment() 7720 ) 7721 or self._parse_select() 7722 ) 7723 7724 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7725 return self._parse_query_modifiers( 7726 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7727 ) 7728 7729 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7730 this = None 7731 if self._match_texts(self.TRANSACTION_KIND): 7732 this = self._prev.text 7733 7734 self._match_texts(("TRANSACTION", "WORK")) 7735 7736 modes = [] 7737 while True: 7738 mode = [] 7739 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7740 mode.append(self._prev.text) 7741 7742 if mode: 7743 modes.append(" ".join(mode)) 7744 if not self._match(TokenType.COMMA): 7745 break 7746 7747 return self.expression(exp.Transaction, this=this, modes=modes) 7748 7749 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7750 chain = None 7751 savepoint = None 7752 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7753 7754 self._match_texts(("TRANSACTION", "WORK")) 7755 7756 if self._match_text_seq("TO"): 7757 self._match_text_seq("SAVEPOINT") 7758 savepoint = self._parse_id_var() 7759 7760 if self._match(TokenType.AND): 7761 chain = not self._match_text_seq("NO") 7762 self._match_text_seq("CHAIN") 7763 7764 if is_rollback: 7765 return self.expression(exp.Rollback, savepoint=savepoint) 7766 7767 return self.expression(exp.Commit, chain=chain) 7768 7769 def _parse_refresh(self) -> exp.Refresh | exp.Command: 7770 if self._match(TokenType.TABLE): 7771 kind = "TABLE" 7772 elif self._match_text_seq("MATERIALIZED", "VIEW"): 7773 kind = "MATERIALIZED VIEW" 7774 else: 7775 kind = "" 7776 7777 this = self._parse_string() or self._parse_table() 7778 if not kind and not isinstance(this, exp.Literal): 7779 return self._parse_as_command(self._prev) 7780 7781 return self.expression(exp.Refresh, this=this, kind=kind) 7782 7783 def _parse_column_def_with_exists(self): 7784 start = self._index 7785 self._match(TokenType.COLUMN) 7786 7787 exists_column = self._parse_exists(not_=True) 7788 expression = self._parse_field_def() 7789 7790 if not isinstance(expression, exp.ColumnDef): 7791 self._retreat(start) 7792 return None 7793 7794 expression.set("exists", exists_column) 7795 7796 return expression 7797 7798 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7799 if not self._prev.text.upper() == "ADD": 7800 return None 7801 7802 expression = self._parse_column_def_with_exists() 7803 if not expression: 7804 return None 7805 7806 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7807 if self._match_texts(("FIRST", "AFTER")): 7808 position = self._prev.text 7809 column_position = self.expression( 7810 exp.ColumnPosition, this=self._parse_column(), position=position 7811 ) 7812 expression.set("position", column_position) 7813 7814 return expression 7815 7816 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7817 drop = self._match(TokenType.DROP) and self._parse_drop() 7818 if drop and not isinstance(drop, exp.Command): 7819 drop.set("kind", drop.args.get("kind", "COLUMN")) 7820 return drop 7821 7822 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7823 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7824 return self.expression( 7825 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7826 ) 7827 7828 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7829 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7830 self._match_text_seq("ADD") 7831 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7832 return self.expression( 7833 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7834 ) 7835 7836 column_def = self._parse_add_column() 7837 if isinstance(column_def, exp.ColumnDef): 7838 return column_def 7839 7840 exists = self._parse_exists(not_=True) 7841 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7842 return self.expression( 7843 exp.AddPartition, 7844 exists=exists, 7845 this=self._parse_field(any_token=True), 7846 location=self._match_text_seq("LOCATION", advance=False) 7847 and self._parse_property(), 7848 ) 7849 7850 return None 7851 7852 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7853 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7854 or self._match_text_seq("COLUMNS") 7855 ): 7856 schema = self._parse_schema() 7857 7858 return ( 7859 ensure_list(schema) 7860 if schema 7861 else self._parse_csv(self._parse_column_def_with_exists) 7862 ) 7863 7864 return self._parse_csv(_parse_add_alteration) 7865 7866 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7867 if self._match_texts(self.ALTER_ALTER_PARSERS): 7868 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7869 7870 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7871 # keyword after ALTER we default to parsing this statement 7872 self._match(TokenType.COLUMN) 7873 column = self._parse_field(any_token=True) 7874 7875 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7876 return self.expression(exp.AlterColumn, this=column, drop=True) 7877 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7878 return self.expression(exp.AlterColumn, this=column, default=self._parse_disjunction()) 7879 if self._match(TokenType.COMMENT): 7880 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7881 if self._match_text_seq("DROP", "NOT", "NULL"): 7882 return self.expression( 7883 exp.AlterColumn, 7884 this=column, 7885 drop=True, 7886 allow_null=True, 7887 ) 7888 if self._match_text_seq("SET", "NOT", "NULL"): 7889 return self.expression( 7890 exp.AlterColumn, 7891 this=column, 7892 allow_null=False, 7893 ) 7894 7895 if self._match_text_seq("SET", "VISIBLE"): 7896 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7897 if self._match_text_seq("SET", "INVISIBLE"): 7898 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7899 7900 self._match_text_seq("SET", "DATA") 7901 self._match_text_seq("TYPE") 7902 return self.expression( 7903 exp.AlterColumn, 7904 this=column, 7905 dtype=self._parse_types(), 7906 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7907 using=self._match(TokenType.USING) and self._parse_disjunction(), 7908 ) 7909 7910 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7911 if self._match_texts(("ALL", "EVEN", "AUTO")): 7912 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7913 7914 self._match_text_seq("KEY", "DISTKEY") 7915 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7916 7917 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7918 if compound: 7919 self._match_text_seq("SORTKEY") 7920 7921 if self._match(TokenType.L_PAREN, advance=False): 7922 return self.expression( 7923 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7924 ) 7925 7926 self._match_texts(("AUTO", "NONE")) 7927 return self.expression( 7928 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7929 ) 7930 7931 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7932 index = self._index - 1 7933 7934 partition_exists = self._parse_exists() 7935 if self._match(TokenType.PARTITION, advance=False): 7936 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7937 7938 self._retreat(index) 7939 return self._parse_csv(self._parse_drop_column) 7940 7941 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7942 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7943 exists = self._parse_exists() 7944 old_column = self._parse_column() 7945 to = self._match_text_seq("TO") 7946 new_column = self._parse_column() 7947 7948 if old_column is None or to is None or new_column is None: 7949 return None 7950 7951 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7952 7953 self._match_text_seq("TO") 7954 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7955 7956 def _parse_alter_table_set(self) -> exp.AlterSet: 7957 alter_set = self.expression(exp.AlterSet) 7958 7959 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7960 "TABLE", "PROPERTIES" 7961 ): 7962 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7963 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7964 alter_set.set("expressions", [self._parse_assignment()]) 7965 elif self._match_texts(("LOGGED", "UNLOGGED")): 7966 alter_set.set("option", exp.var(self._prev.text.upper())) 7967 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7968 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7969 elif self._match_text_seq("LOCATION"): 7970 alter_set.set("location", self._parse_field()) 7971 elif self._match_text_seq("ACCESS", "METHOD"): 7972 alter_set.set("access_method", self._parse_field()) 7973 elif self._match_text_seq("TABLESPACE"): 7974 alter_set.set("tablespace", self._parse_field()) 7975 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7976 alter_set.set("file_format", [self._parse_field()]) 7977 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7978 alter_set.set("file_format", self._parse_wrapped_options()) 7979 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7980 alter_set.set("copy_options", self._parse_wrapped_options()) 7981 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7982 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7983 else: 7984 if self._match_text_seq("SERDE"): 7985 alter_set.set("serde", self._parse_field()) 7986 7987 properties = self._parse_wrapped(self._parse_properties, optional=True) 7988 alter_set.set("expressions", [properties]) 7989 7990 return alter_set 7991 7992 def _parse_alter_session(self) -> exp.AlterSession: 7993 """Parse ALTER SESSION SET/UNSET statements.""" 7994 if self._match(TokenType.SET): 7995 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7996 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7997 7998 self._match_text_seq("UNSET") 7999 expressions = self._parse_csv( 8000 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 8001 ) 8002 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 8003 8004 def _parse_alter(self) -> exp.Alter | exp.Command: 8005 start = self._prev 8006 8007 alter_token = self._match_set(self.ALTERABLES) and self._prev 8008 if not alter_token: 8009 return self._parse_as_command(start) 8010 8011 exists = self._parse_exists() 8012 only = self._match_text_seq("ONLY") 8013 8014 if alter_token.token_type == TokenType.SESSION: 8015 this = None 8016 check = None 8017 cluster = None 8018 else: 8019 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8020 check = self._match_text_seq("WITH", "CHECK") 8021 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8022 8023 if self._next: 8024 self._advance() 8025 8026 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8027 if parser: 8028 actions = ensure_list(parser(self)) 8029 not_valid = self._match_text_seq("NOT", "VALID") 8030 options = self._parse_csv(self._parse_property) 8031 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8032 8033 if not self._curr and actions: 8034 return self.expression( 8035 exp.Alter, 8036 this=this, 8037 kind=alter_token.text.upper(), 8038 exists=exists, 8039 actions=actions, 8040 only=only, 8041 options=options, 8042 cluster=cluster, 8043 not_valid=not_valid, 8044 check=check, 8045 cascade=cascade, 8046 ) 8047 8048 return self._parse_as_command(start) 8049 8050 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8051 start = self._prev 8052 # https://duckdb.org/docs/sql/statements/analyze 8053 if not self._curr: 8054 return self.expression(exp.Analyze) 8055 8056 options = [] 8057 while self._match_texts(self.ANALYZE_STYLES): 8058 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8059 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8060 else: 8061 options.append(self._prev.text.upper()) 8062 8063 this: t.Optional[exp.Expression] = None 8064 inner_expression: t.Optional[exp.Expression] = None 8065 8066 kind = self._curr and self._curr.text.upper() 8067 8068 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8069 this = self._parse_table_parts() 8070 elif self._match_text_seq("TABLES"): 8071 if self._match_set((TokenType.FROM, TokenType.IN)): 8072 kind = f"{kind} {self._prev.text.upper()}" 8073 this = self._parse_table(schema=True, is_db_reference=True) 8074 elif self._match_text_seq("DATABASE"): 8075 this = self._parse_table(schema=True, is_db_reference=True) 8076 elif self._match_text_seq("CLUSTER"): 8077 this = self._parse_table() 8078 # Try matching inner expr keywords before fallback to parse table. 8079 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8080 kind = None 8081 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8082 else: 8083 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8084 kind = None 8085 this = self._parse_table_parts() 8086 8087 partition = self._try_parse(self._parse_partition) 8088 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8089 return self._parse_as_command(start) 8090 8091 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8092 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8093 "WITH", "ASYNC", "MODE" 8094 ): 8095 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8096 else: 8097 mode = None 8098 8099 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8100 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8101 8102 properties = self._parse_properties() 8103 return self.expression( 8104 exp.Analyze, 8105 kind=kind, 8106 this=this, 8107 mode=mode, 8108 partition=partition, 8109 properties=properties, 8110 expression=inner_expression, 8111 options=options, 8112 ) 8113 8114 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8115 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8116 this = None 8117 kind = self._prev.text.upper() 8118 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8119 expressions = [] 8120 8121 if not self._match_text_seq("STATISTICS"): 8122 self.raise_error("Expecting token STATISTICS") 8123 8124 if self._match_text_seq("NOSCAN"): 8125 this = "NOSCAN" 8126 elif self._match(TokenType.FOR): 8127 if self._match_text_seq("ALL", "COLUMNS"): 8128 this = "FOR ALL COLUMNS" 8129 if self._match_texts("COLUMNS"): 8130 this = "FOR COLUMNS" 8131 expressions = self._parse_csv(self._parse_column_reference) 8132 elif self._match_text_seq("SAMPLE"): 8133 sample = self._parse_number() 8134 expressions = [ 8135 self.expression( 8136 exp.AnalyzeSample, 8137 sample=sample, 8138 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8139 ) 8140 ] 8141 8142 return self.expression( 8143 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 8144 ) 8145 8146 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8147 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8148 kind = None 8149 this = None 8150 expression: t.Optional[exp.Expression] = None 8151 if self._match_text_seq("REF", "UPDATE"): 8152 kind = "REF" 8153 this = "UPDATE" 8154 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8155 this = "UPDATE SET DANGLING TO NULL" 8156 elif self._match_text_seq("STRUCTURE"): 8157 kind = "STRUCTURE" 8158 if self._match_text_seq("CASCADE", "FAST"): 8159 this = "CASCADE FAST" 8160 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8161 ("ONLINE", "OFFLINE") 8162 ): 8163 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8164 expression = self._parse_into() 8165 8166 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 8167 8168 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 8169 this = self._prev.text.upper() 8170 if self._match_text_seq("COLUMNS"): 8171 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 8172 return None 8173 8174 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 8175 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8176 if self._match_text_seq("STATISTICS"): 8177 return self.expression(exp.AnalyzeDelete, kind=kind) 8178 return None 8179 8180 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 8181 if self._match_text_seq("CHAINED", "ROWS"): 8182 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 8183 return None 8184 8185 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8186 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8187 this = self._prev.text.upper() 8188 expression: t.Optional[exp.Expression] = None 8189 expressions = [] 8190 update_options = None 8191 8192 if self._match_text_seq("HISTOGRAM", "ON"): 8193 expressions = self._parse_csv(self._parse_column_reference) 8194 with_expressions = [] 8195 while self._match(TokenType.WITH): 8196 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8197 if self._match_texts(("SYNC", "ASYNC")): 8198 if self._match_text_seq("MODE", advance=False): 8199 with_expressions.append(f"{self._prev.text.upper()} MODE") 8200 self._advance() 8201 else: 8202 buckets = self._parse_number() 8203 if self._match_text_seq("BUCKETS"): 8204 with_expressions.append(f"{buckets} BUCKETS") 8205 if with_expressions: 8206 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 8207 8208 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8209 TokenType.UPDATE, advance=False 8210 ): 8211 update_options = self._prev.text.upper() 8212 self._advance() 8213 elif self._match_text_seq("USING", "DATA"): 8214 expression = self.expression(exp.UsingData, this=self._parse_string()) 8215 8216 return self.expression( 8217 exp.AnalyzeHistogram, 8218 this=this, 8219 expressions=expressions, 8220 expression=expression, 8221 update_options=update_options, 8222 ) 8223 8224 def _parse_merge(self) -> exp.Merge: 8225 self._match(TokenType.INTO) 8226 target = self._parse_table() 8227 8228 if target and self._match(TokenType.ALIAS, advance=False): 8229 target.set("alias", self._parse_table_alias()) 8230 8231 self._match(TokenType.USING) 8232 using = self._parse_table() 8233 8234 return self.expression( 8235 exp.Merge, 8236 this=target, 8237 using=using, 8238 on=self._match(TokenType.ON) and self._parse_disjunction(), 8239 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8240 whens=self._parse_when_matched(), 8241 returning=self._parse_returning(), 8242 ) 8243 8244 def _parse_when_matched(self) -> exp.Whens: 8245 whens = [] 8246 8247 while self._match(TokenType.WHEN): 8248 matched = not self._match(TokenType.NOT) 8249 self._match_text_seq("MATCHED") 8250 source = ( 8251 False 8252 if self._match_text_seq("BY", "TARGET") 8253 else self._match_text_seq("BY", "SOURCE") 8254 ) 8255 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8256 8257 self._match(TokenType.THEN) 8258 8259 if self._match(TokenType.INSERT): 8260 this = self._parse_star() 8261 if this: 8262 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 8263 else: 8264 then = self.expression( 8265 exp.Insert, 8266 this=exp.var("ROW") 8267 if self._match_text_seq("ROW") 8268 else self._parse_value(values=False), 8269 expression=self._match_text_seq("VALUES") and self._parse_value(), 8270 ) 8271 elif self._match(TokenType.UPDATE): 8272 expressions = self._parse_star() 8273 if expressions: 8274 then = self.expression(exp.Update, expressions=expressions) 8275 else: 8276 then = self.expression( 8277 exp.Update, 8278 expressions=self._match(TokenType.SET) 8279 and self._parse_csv(self._parse_equality), 8280 ) 8281 elif self._match(TokenType.DELETE): 8282 then = self.expression(exp.Var, this=self._prev.text) 8283 else: 8284 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8285 8286 whens.append( 8287 self.expression( 8288 exp.When, 8289 matched=matched, 8290 source=source, 8291 condition=condition, 8292 then=then, 8293 ) 8294 ) 8295 return self.expression(exp.Whens, expressions=whens) 8296 8297 def _parse_show(self) -> t.Optional[exp.Expression]: 8298 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8299 if parser: 8300 return parser(self) 8301 return self._parse_as_command(self._prev) 8302 8303 def _parse_set_item_assignment( 8304 self, kind: t.Optional[str] = None 8305 ) -> t.Optional[exp.Expression]: 8306 index = self._index 8307 8308 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8309 return self._parse_set_transaction(global_=kind == "GLOBAL") 8310 8311 left = self._parse_primary() or self._parse_column() 8312 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8313 8314 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8315 self._retreat(index) 8316 return None 8317 8318 right = self._parse_statement() or self._parse_id_var() 8319 if isinstance(right, (exp.Column, exp.Identifier)): 8320 right = exp.var(right.name) 8321 8322 this = self.expression(exp.EQ, this=left, expression=right) 8323 return self.expression(exp.SetItem, this=this, kind=kind) 8324 8325 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8326 self._match_text_seq("TRANSACTION") 8327 characteristics = self._parse_csv( 8328 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8329 ) 8330 return self.expression( 8331 exp.SetItem, 8332 expressions=characteristics, 8333 kind="TRANSACTION", 8334 global_=global_, 8335 ) 8336 8337 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8338 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8339 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8340 8341 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8342 index = self._index 8343 set_ = self.expression( 8344 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8345 ) 8346 8347 if self._curr: 8348 self._retreat(index) 8349 return self._parse_as_command(self._prev) 8350 8351 return set_ 8352 8353 def _parse_var_from_options( 8354 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8355 ) -> t.Optional[exp.Var]: 8356 start = self._curr 8357 if not start: 8358 return None 8359 8360 option = start.text.upper() 8361 continuations = options.get(option) 8362 8363 index = self._index 8364 self._advance() 8365 for keywords in continuations or []: 8366 if isinstance(keywords, str): 8367 keywords = (keywords,) 8368 8369 if self._match_text_seq(*keywords): 8370 option = f"{option} {' '.join(keywords)}" 8371 break 8372 else: 8373 if continuations or continuations is None: 8374 if raise_unmatched: 8375 self.raise_error(f"Unknown option {option}") 8376 8377 self._retreat(index) 8378 return None 8379 8380 return exp.var(option) 8381 8382 def _parse_as_command(self, start: Token) -> exp.Command: 8383 while self._curr: 8384 self._advance() 8385 text = self._find_sql(start, self._prev) 8386 size = len(start.text) 8387 self._warn_unsupported() 8388 return exp.Command(this=text[:size], expression=text[size:]) 8389 8390 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8391 settings = [] 8392 8393 self._match_l_paren() 8394 kind = self._parse_id_var() 8395 8396 if self._match(TokenType.L_PAREN): 8397 while True: 8398 key = self._parse_id_var() 8399 value = self._parse_primary() 8400 if not key and value is None: 8401 break 8402 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8403 self._match(TokenType.R_PAREN) 8404 8405 self._match_r_paren() 8406 8407 return self.expression( 8408 exp.DictProperty, 8409 this=this, 8410 kind=kind.this if kind else None, 8411 settings=settings, 8412 ) 8413 8414 def _parse_dict_range(self, this: str) -> exp.DictRange: 8415 self._match_l_paren() 8416 has_min = self._match_text_seq("MIN") 8417 if has_min: 8418 min = self._parse_var() or self._parse_primary() 8419 self._match_text_seq("MAX") 8420 max = self._parse_var() or self._parse_primary() 8421 else: 8422 max = self._parse_var() or self._parse_primary() 8423 min = exp.Literal.number(0) 8424 self._match_r_paren() 8425 return self.expression(exp.DictRange, this=this, min=min, max=max) 8426 8427 def _parse_comprehension( 8428 self, this: t.Optional[exp.Expression] 8429 ) -> t.Optional[exp.Comprehension]: 8430 index = self._index 8431 expression = self._parse_column() 8432 position = self._match(TokenType.COMMA) and self._parse_column() 8433 8434 if not self._match(TokenType.IN): 8435 self._retreat(index - 1) 8436 return None 8437 iterator = self._parse_column() 8438 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 8439 return self.expression( 8440 exp.Comprehension, 8441 this=this, 8442 expression=expression, 8443 position=position, 8444 iterator=iterator, 8445 condition=condition, 8446 ) 8447 8448 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8449 if self._match(TokenType.HEREDOC_STRING): 8450 return self.expression(exp.Heredoc, this=self._prev.text) 8451 8452 if not self._match_text_seq("$"): 8453 return None 8454 8455 tags = ["$"] 8456 tag_text = None 8457 8458 if self._is_connected(): 8459 self._advance() 8460 tags.append(self._prev.text.upper()) 8461 else: 8462 self.raise_error("No closing $ found") 8463 8464 if tags[-1] != "$": 8465 if self._is_connected() and self._match_text_seq("$"): 8466 tag_text = tags[-1] 8467 tags.append("$") 8468 else: 8469 self.raise_error("No closing $ found") 8470 8471 heredoc_start = self._curr 8472 8473 while self._curr: 8474 if self._match_text_seq(*tags, advance=False): 8475 this = self._find_sql(heredoc_start, self._prev) 8476 self._advance(len(tags)) 8477 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8478 8479 self._advance() 8480 8481 self.raise_error(f"No closing {''.join(tags)} found") 8482 return None 8483 8484 def _find_parser( 8485 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8486 ) -> t.Optional[t.Callable]: 8487 if not self._curr: 8488 return None 8489 8490 index = self._index 8491 this = [] 8492 while True: 8493 # The current token might be multiple words 8494 curr = self._curr.text.upper() 8495 key = curr.split(" ") 8496 this.append(curr) 8497 8498 self._advance() 8499 result, trie = in_trie(trie, key) 8500 if result == TrieResult.FAILED: 8501 break 8502 8503 if result == TrieResult.EXISTS: 8504 subparser = parsers[" ".join(this)] 8505 return subparser 8506 8507 self._retreat(index) 8508 return None 8509 8510 def _match(self, token_type, advance=True, expression=None): 8511 if not self._curr: 8512 return None 8513 8514 if self._curr.token_type == token_type: 8515 if advance: 8516 self._advance() 8517 self._add_comments(expression) 8518 return True 8519 8520 return None 8521 8522 def _match_set(self, types, advance=True): 8523 if not self._curr: 8524 return None 8525 8526 if self._curr.token_type in types: 8527 if advance: 8528 self._advance() 8529 return True 8530 8531 return None 8532 8533 def _match_pair(self, token_type_a, token_type_b, advance=True): 8534 if not self._curr or not self._next: 8535 return None 8536 8537 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8538 if advance: 8539 self._advance(2) 8540 return True 8541 8542 return None 8543 8544 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8545 if not self._match(TokenType.L_PAREN, expression=expression): 8546 self.raise_error("Expecting (") 8547 8548 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8549 if not self._match(TokenType.R_PAREN, expression=expression): 8550 self.raise_error("Expecting )") 8551 8552 def _match_texts(self, texts, advance=True): 8553 if ( 8554 self._curr 8555 and self._curr.token_type != TokenType.STRING 8556 and self._curr.text.upper() in texts 8557 ): 8558 if advance: 8559 self._advance() 8560 return True 8561 return None 8562 8563 def _match_text_seq(self, *texts, advance=True): 8564 index = self._index 8565 for text in texts: 8566 if ( 8567 self._curr 8568 and self._curr.token_type != TokenType.STRING 8569 and self._curr.text.upper() == text 8570 ): 8571 self._advance() 8572 else: 8573 self._retreat(index) 8574 return None 8575 8576 if not advance: 8577 self._retreat(index) 8578 8579 return True 8580 8581 def _replace_lambda( 8582 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8583 ) -> t.Optional[exp.Expression]: 8584 if not node: 8585 return node 8586 8587 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8588 8589 for column in node.find_all(exp.Column): 8590 typ = lambda_types.get(column.parts[0].name) 8591 if typ is not None: 8592 dot_or_id = column.to_dot() if column.table else column.this 8593 8594 if typ: 8595 dot_or_id = self.expression( 8596 exp.Cast, 8597 this=dot_or_id, 8598 to=typ, 8599 ) 8600 8601 parent = column.parent 8602 8603 while isinstance(parent, exp.Dot): 8604 if not isinstance(parent.parent, exp.Dot): 8605 parent.replace(dot_or_id) 8606 break 8607 parent = parent.parent 8608 else: 8609 if column is node: 8610 node = dot_or_id 8611 else: 8612 column.replace(dot_or_id) 8613 return node 8614 8615 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8616 start = self._prev 8617 8618 # Not to be confused with TRUNCATE(number, decimals) function call 8619 if self._match(TokenType.L_PAREN): 8620 self._retreat(self._index - 2) 8621 return self._parse_function() 8622 8623 # Clickhouse supports TRUNCATE DATABASE as well 8624 is_database = self._match(TokenType.DATABASE) 8625 8626 self._match(TokenType.TABLE) 8627 8628 exists = self._parse_exists(not_=False) 8629 8630 expressions = self._parse_csv( 8631 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8632 ) 8633 8634 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8635 8636 if self._match_text_seq("RESTART", "IDENTITY"): 8637 identity = "RESTART" 8638 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8639 identity = "CONTINUE" 8640 else: 8641 identity = None 8642 8643 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8644 option = self._prev.text 8645 else: 8646 option = None 8647 8648 partition = self._parse_partition() 8649 8650 # Fallback case 8651 if self._curr: 8652 return self._parse_as_command(start) 8653 8654 return self.expression( 8655 exp.TruncateTable, 8656 expressions=expressions, 8657 is_database=is_database, 8658 exists=exists, 8659 cluster=cluster, 8660 identity=identity, 8661 option=option, 8662 partition=partition, 8663 ) 8664 8665 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8666 this = self._parse_ordered(self._parse_opclass) 8667 8668 if not self._match(TokenType.WITH): 8669 return this 8670 8671 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 8672 8673 return self.expression(exp.WithOperator, this=this, op=op) 8674 8675 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8676 self._match(TokenType.EQ) 8677 self._match(TokenType.L_PAREN) 8678 8679 opts: t.List[t.Optional[exp.Expression]] = [] 8680 option: exp.Expression | None 8681 while self._curr and not self._match(TokenType.R_PAREN): 8682 if self._match_text_seq("FORMAT_NAME", "="): 8683 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8684 option = self._parse_format_name() 8685 else: 8686 option = self._parse_property() 8687 8688 if option is None: 8689 self.raise_error("Unable to parse option") 8690 break 8691 8692 opts.append(option) 8693 8694 return opts 8695 8696 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8697 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8698 8699 options = [] 8700 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8701 option = self._parse_var(any_token=True) 8702 prev = self._prev.text.upper() 8703 8704 # Different dialects might separate options and values by white space, "=" and "AS" 8705 self._match(TokenType.EQ) 8706 self._match(TokenType.ALIAS) 8707 8708 param = self.expression(exp.CopyParameter, this=option) 8709 8710 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8711 TokenType.L_PAREN, advance=False 8712 ): 8713 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8714 param.set("expressions", self._parse_wrapped_options()) 8715 elif prev == "FILE_FORMAT": 8716 # T-SQL's external file format case 8717 param.set("expression", self._parse_field()) 8718 elif ( 8719 prev == "FORMAT" 8720 and self._prev.token_type == TokenType.ALIAS 8721 and self._match_texts(("AVRO", "JSON")) 8722 ): 8723 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 8724 param.set("expression", self._parse_field()) 8725 else: 8726 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 8727 8728 options.append(param) 8729 self._match(sep) 8730 8731 return options 8732 8733 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8734 expr = self.expression(exp.Credentials) 8735 8736 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8737 expr.set("storage", self._parse_field()) 8738 if self._match_text_seq("CREDENTIALS"): 8739 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8740 creds = ( 8741 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8742 ) 8743 expr.set("credentials", creds) 8744 if self._match_text_seq("ENCRYPTION"): 8745 expr.set("encryption", self._parse_wrapped_options()) 8746 if self._match_text_seq("IAM_ROLE"): 8747 expr.set( 8748 "iam_role", 8749 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 8750 ) 8751 if self._match_text_seq("REGION"): 8752 expr.set("region", self._parse_field()) 8753 8754 return expr 8755 8756 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8757 return self._parse_field() 8758 8759 def _parse_copy(self) -> exp.Copy | exp.Command: 8760 start = self._prev 8761 8762 self._match(TokenType.INTO) 8763 8764 this = ( 8765 self._parse_select(nested=True, parse_subquery_alias=False) 8766 if self._match(TokenType.L_PAREN, advance=False) 8767 else self._parse_table(schema=True) 8768 ) 8769 8770 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8771 8772 files = self._parse_csv(self._parse_file_location) 8773 if self._match(TokenType.EQ, advance=False): 8774 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8775 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8776 # list via `_parse_wrapped(..)` below. 8777 self._advance(-1) 8778 files = [] 8779 8780 credentials = self._parse_credentials() 8781 8782 self._match_text_seq("WITH") 8783 8784 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8785 8786 # Fallback case 8787 if self._curr: 8788 return self._parse_as_command(start) 8789 8790 return self.expression( 8791 exp.Copy, 8792 this=this, 8793 kind=kind, 8794 credentials=credentials, 8795 files=files, 8796 params=params, 8797 ) 8798 8799 def _parse_normalize(self) -> exp.Normalize: 8800 return self.expression( 8801 exp.Normalize, 8802 this=self._parse_bitwise(), 8803 form=self._match(TokenType.COMMA) and self._parse_var(), 8804 ) 8805 8806 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8807 args = self._parse_csv(lambda: self._parse_lambda()) 8808 8809 this = seq_get(args, 0) 8810 decimals = seq_get(args, 1) 8811 8812 return expr_type( 8813 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8814 ) 8815 8816 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8817 star_token = self._prev 8818 8819 if self._match_text_seq("COLUMNS", "(", advance=False): 8820 this = self._parse_function() 8821 if isinstance(this, exp.Columns): 8822 this.set("unpack", True) 8823 return this 8824 8825 return self.expression( 8826 exp.Star, 8827 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 8828 replace=self._parse_star_op("REPLACE"), 8829 rename=self._parse_star_op("RENAME"), 8830 ).update_positions(star_token) 8831 8832 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8833 privilege_parts = [] 8834 8835 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8836 # (end of privilege list) or L_PAREN (start of column list) are met 8837 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8838 privilege_parts.append(self._curr.text.upper()) 8839 self._advance() 8840 8841 this = exp.var(" ".join(privilege_parts)) 8842 expressions = ( 8843 self._parse_wrapped_csv(self._parse_column) 8844 if self._match(TokenType.L_PAREN, advance=False) 8845 else None 8846 ) 8847 8848 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8849 8850 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8851 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8852 principal = self._parse_id_var() 8853 8854 if not principal: 8855 return None 8856 8857 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8858 8859 def _parse_grant_revoke_common( 8860 self, 8861 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8862 privileges = self._parse_csv(self._parse_grant_privilege) 8863 8864 self._match(TokenType.ON) 8865 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8866 8867 # Attempt to parse the securable e.g. MySQL allows names 8868 # such as "foo.*", "*.*" which are not easily parseable yet 8869 securable = self._try_parse(self._parse_table_parts) 8870 8871 return privileges, kind, securable 8872 8873 def _parse_grant(self) -> exp.Grant | exp.Command: 8874 start = self._prev 8875 8876 privileges, kind, securable = self._parse_grant_revoke_common() 8877 8878 if not securable or not self._match_text_seq("TO"): 8879 return self._parse_as_command(start) 8880 8881 principals = self._parse_csv(self._parse_grant_principal) 8882 8883 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8884 8885 if self._curr: 8886 return self._parse_as_command(start) 8887 8888 return self.expression( 8889 exp.Grant, 8890 privileges=privileges, 8891 kind=kind, 8892 securable=securable, 8893 principals=principals, 8894 grant_option=grant_option, 8895 ) 8896 8897 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8898 start = self._prev 8899 8900 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8901 8902 privileges, kind, securable = self._parse_grant_revoke_common() 8903 8904 if not securable or not self._match_text_seq("FROM"): 8905 return self._parse_as_command(start) 8906 8907 principals = self._parse_csv(self._parse_grant_principal) 8908 8909 cascade = None 8910 if self._match_texts(("CASCADE", "RESTRICT")): 8911 cascade = self._prev.text.upper() 8912 8913 if self._curr: 8914 return self._parse_as_command(start) 8915 8916 return self.expression( 8917 exp.Revoke, 8918 privileges=privileges, 8919 kind=kind, 8920 securable=securable, 8921 principals=principals, 8922 grant_option=grant_option, 8923 cascade=cascade, 8924 ) 8925 8926 def _parse_overlay(self) -> exp.Overlay: 8927 def _parse_overlay_arg(text: str) -> t.Optional[exp.Expression]: 8928 return ( 8929 self._match(TokenType.COMMA) or self._match_text_seq(text) 8930 ) and self._parse_bitwise() 8931 8932 return self.expression( 8933 exp.Overlay, 8934 this=self._parse_bitwise(), 8935 expression=_parse_overlay_arg("PLACING"), 8936 from_=_parse_overlay_arg("FROM"), 8937 for_=_parse_overlay_arg("FOR"), 8938 ) 8939 8940 def _parse_format_name(self) -> exp.Property: 8941 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8942 # for FILE_FORMAT = <format_name> 8943 return self.expression( 8944 exp.Property, 8945 this=exp.var("FORMAT_NAME"), 8946 value=self._parse_string() or self._parse_table_parts(), 8947 ) 8948 8949 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8950 args: t.List[exp.Expression] = [] 8951 8952 if self._match(TokenType.DISTINCT): 8953 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8954 self._match(TokenType.COMMA) 8955 8956 args.extend(self._parse_function_args()) 8957 8958 return self.expression( 8959 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8960 ) 8961 8962 def _identifier_expression( 8963 self, token: t.Optional[Token] = None, **kwargs: t.Any 8964 ) -> exp.Identifier: 8965 return self.expression(exp.Identifier, token=token or self._prev, **kwargs) 8966 8967 def _build_pipe_cte( 8968 self, 8969 query: exp.Query, 8970 expressions: t.List[exp.Expression], 8971 alias_cte: t.Optional[exp.TableAlias] = None, 8972 ) -> exp.Select: 8973 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8974 if alias_cte: 8975 new_cte = alias_cte 8976 else: 8977 self._pipe_cte_counter += 1 8978 new_cte = f"__tmp{self._pipe_cte_counter}" 8979 8980 with_ = query.args.get("with_") 8981 ctes = with_.pop() if with_ else None 8982 8983 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8984 if ctes: 8985 new_select.set("with_", ctes) 8986 8987 return new_select.with_(new_cte, as_=query, copy=False) 8988 8989 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8990 select = self._parse_select(consume_pipe=False) 8991 if not select: 8992 return query 8993 8994 return self._build_pipe_cte( 8995 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8996 ) 8997 8998 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8999 limit = self._parse_limit() 9000 offset = self._parse_offset() 9001 if limit: 9002 curr_limit = query.args.get("limit", limit) 9003 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9004 query.limit(limit, copy=False) 9005 if offset: 9006 curr_offset = query.args.get("offset") 9007 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9008 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9009 9010 return query 9011 9012 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 9013 this = self._parse_disjunction() 9014 if self._match_text_seq("GROUP", "AND", advance=False): 9015 return this 9016 9017 this = self._parse_alias(this) 9018 9019 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9020 return self._parse_ordered(lambda: this) 9021 9022 return this 9023 9024 def _parse_pipe_syntax_aggregate_group_order_by( 9025 self, query: exp.Select, group_by_exists: bool = True 9026 ) -> exp.Select: 9027 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9028 aggregates_or_groups, orders = [], [] 9029 for element in expr: 9030 if isinstance(element, exp.Ordered): 9031 this = element.this 9032 if isinstance(this, exp.Alias): 9033 element.set("this", this.args["alias"]) 9034 orders.append(element) 9035 else: 9036 this = element 9037 aggregates_or_groups.append(this) 9038 9039 if group_by_exists: 9040 query.select(*aggregates_or_groups, copy=False).group_by( 9041 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9042 copy=False, 9043 ) 9044 else: 9045 query.select(*aggregates_or_groups, append=False, copy=False) 9046 9047 if orders: 9048 return query.order_by(*orders, append=False, copy=False) 9049 9050 return query 9051 9052 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9053 self._match_text_seq("AGGREGATE") 9054 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9055 9056 if self._match(TokenType.GROUP_BY) or ( 9057 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9058 ): 9059 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9060 9061 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9062 9063 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 9064 first_setop = self.parse_set_operation(this=query) 9065 if not first_setop: 9066 return None 9067 9068 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 9069 expr = self._parse_paren() 9070 return expr.assert_is(exp.Subquery).unnest() if expr else None 9071 9072 first_setop.this.pop() 9073 9074 setops = [ 9075 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9076 *self._parse_csv(_parse_and_unwrap_query), 9077 ] 9078 9079 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9080 with_ = query.args.get("with_") 9081 ctes = with_.pop() if with_ else None 9082 9083 if isinstance(first_setop, exp.Union): 9084 query = query.union(*setops, copy=False, **first_setop.args) 9085 elif isinstance(first_setop, exp.Except): 9086 query = query.except_(*setops, copy=False, **first_setop.args) 9087 else: 9088 query = query.intersect(*setops, copy=False, **first_setop.args) 9089 9090 query.set("with_", ctes) 9091 9092 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9093 9094 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 9095 join = self._parse_join() 9096 if not join: 9097 return None 9098 9099 if isinstance(query, exp.Select): 9100 return query.join(join, copy=False) 9101 9102 return query 9103 9104 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9105 pivots = self._parse_pivots() 9106 if not pivots: 9107 return query 9108 9109 from_ = query.args.get("from_") 9110 if from_: 9111 from_.this.set("pivots", pivots) 9112 9113 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9114 9115 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9116 self._match_text_seq("EXTEND") 9117 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9118 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9119 9120 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9121 sample = self._parse_table_sample() 9122 9123 with_ = query.args.get("with_") 9124 if with_: 9125 with_.expressions[-1].this.set("sample", sample) 9126 else: 9127 query.set("sample", sample) 9128 9129 return query 9130 9131 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 9132 if isinstance(query, exp.Subquery): 9133 query = exp.select("*").from_(query, copy=False) 9134 9135 if not query.args.get("from_"): 9136 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9137 9138 while self._match(TokenType.PIPE_GT): 9139 start = self._curr 9140 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 9141 if not parser: 9142 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9143 # keywords, making it tricky to disambiguate them without lookahead. The approach 9144 # here is to try and parse a set operation and if that fails, then try to parse a 9145 # join operator. If that fails as well, then the operator is not supported. 9146 parsed_query = self._parse_pipe_syntax_set_operator(query) 9147 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9148 if not parsed_query: 9149 self._retreat(start) 9150 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 9151 break 9152 query = parsed_query 9153 else: 9154 query = parser(self, query) 9155 9156 return query 9157 9158 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 9159 vars = self._parse_csv(self._parse_id_var) 9160 if not vars: 9161 return None 9162 9163 return self.expression( 9164 exp.DeclareItem, 9165 this=vars, 9166 kind=self._parse_types(), 9167 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 9168 ) 9169 9170 def _parse_declare(self) -> exp.Declare | exp.Command: 9171 start = self._prev 9172 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9173 9174 if not expressions or self._curr: 9175 return self._parse_as_command(start) 9176 9177 return self.expression(exp.Declare, expressions=expressions) 9178 9179 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9180 exp_class = exp.Cast if strict else exp.TryCast 9181 9182 if exp_class == exp.TryCast: 9183 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9184 9185 return self.expression(exp_class, **kwargs) 9186 9187 def _parse_json_value(self) -> exp.JSONValue: 9188 this = self._parse_bitwise() 9189 self._match(TokenType.COMMA) 9190 path = self._parse_bitwise() 9191 9192 returning = self._match(TokenType.RETURNING) and self._parse_type() 9193 9194 return self.expression( 9195 exp.JSONValue, 9196 this=this, 9197 path=self.dialect.to_json_path(path), 9198 returning=returning, 9199 on_condition=self._parse_on_condition(), 9200 ) 9201 9202 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 9203 def concat_exprs( 9204 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 9205 ) -> exp.Expression: 9206 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9207 concat_exprs = [ 9208 self.expression( 9209 exp.Concat, 9210 expressions=node.expressions, 9211 safe=True, 9212 coalesce=self.dialect.CONCAT_COALESCE, 9213 ) 9214 ] 9215 node.set("expressions", concat_exprs) 9216 return node 9217 if len(exprs) == 1: 9218 return exprs[0] 9219 return self.expression( 9220 exp.Concat, expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE 9221 ) 9222 9223 args = self._parse_csv(self._parse_lambda) 9224 9225 if args: 9226 order = args[-1] if isinstance(args[-1], exp.Order) else None 9227 9228 if order: 9229 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9230 # remove 'expr' from exp.Order and add it back to args 9231 args[-1] = order.this 9232 order.set("this", concat_exprs(order.this, args)) 9233 9234 this = order or concat_exprs(args[0], args) 9235 else: 9236 this = None 9237 9238 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9239 9240 return self.expression(exp.GroupConcat, this=this, separator=separator) 9241 9242 def _parse_initcap(self) -> exp.Initcap: 9243 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9244 9245 # attach dialect's default delimiters 9246 if expr.args.get("expression") is None: 9247 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9248 9249 return expr 9250 9251 def _parse_operator(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 9252 while True: 9253 if not self._match(TokenType.L_PAREN): 9254 break 9255 9256 op = "" 9257 while self._curr and not self._match(TokenType.R_PAREN): 9258 op += self._curr.text 9259 self._advance() 9260 9261 this = self.expression( 9262 exp.Operator, 9263 comments=self._prev_comments, 9264 this=this, 9265 operator=op, 9266 expression=self._parse_bitwise(), 9267 ) 9268 9269 if not self._match(TokenType.OPERATOR): 9270 break 9271 9272 return this
39def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 40 if len(args) == 1 and args[0].is_star: 41 return exp.StarMap(this=args[0]) 42 43 keys = [] 44 values = [] 45 for i in range(0, len(args), 2): 46 keys.append(args[i]) 47 values.append(args[i + 1]) 48 49 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
57def binary_range_parser( 58 expr_type: t.Type[exp.Expression], reverse_args: bool = False 59) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 60 def _parse_binary_range( 61 self: Parser, this: t.Optional[exp.Expression] 62 ) -> t.Optional[exp.Expression]: 63 expression = self._parse_bitwise() 64 if reverse_args: 65 this, expression = expression, this 66 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 67 68 return _parse_binary_range
71def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 72 # Default argument order is base, expression 73 this = seq_get(args, 0) 74 expression = seq_get(args, 1) 75 76 if expression: 77 if not dialect.LOG_BASE_FIRST: 78 this, expression = expression, this 79 return exp.Log(this=this, expression=expression) 80 81 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
101def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 102 def _builder(args: t.List, dialect: Dialect) -> E: 103 expression = expr_type( 104 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 105 ) 106 if len(args) > 2 and expr_type is exp.JSONExtract: 107 expression.set("expressions", args[2:]) 108 if expr_type is exp.JSONExtractScalar: 109 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 110 111 return expression 112 113 return _builder
116def build_mod(args: t.List) -> exp.Mod: 117 this = seq_get(args, 0) 118 expression = seq_get(args, 1) 119 120 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 121 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 122 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 123 124 return exp.Mod(this=this, expression=expression)
136def build_array_constructor( 137 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 138) -> exp.Expression: 139 array_exp = exp_class(expressions=args) 140 141 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 142 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 143 144 return array_exp
147def build_convert_timezone( 148 args: t.List, default_source_tz: t.Optional[str] = None 149) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 150 if len(args) == 2: 151 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 152 return exp.ConvertTimezone( 153 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 154 ) 155 156 return exp.ConvertTimezone.from_arg_list(args)
159def build_trim(args: t.List, is_left: bool = True, reverse_args: bool = False): 160 this, expression = seq_get(args, 0), seq_get(args, 1) 161 162 if expression and reverse_args: 163 this, expression = expression, this 164 165 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
182def build_array_append(args: t.List, dialect: Dialect) -> exp.ArrayAppend: 183 """ 184 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 185 186 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 187 Others (DuckDB, PostgreSQL) create a new single-element array instead. 188 189 Args: 190 args: Function arguments [array, element] 191 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 192 193 Returns: 194 ArrayAppend expression with appropriate null_propagation flag 195 """ 196 return exp.ArrayAppend( 197 this=seq_get(args, 0), 198 expression=seq_get(args, 1), 199 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 200 )
Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayAppend expression with appropriate null_propagation flag
203def build_array_prepend(args: t.List, dialect: Dialect) -> exp.ArrayPrepend: 204 """ 205 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 206 207 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 208 Others (DuckDB, PostgreSQL) create a new single-element array instead. 209 210 Args: 211 args: Function arguments [array, element] 212 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 213 214 Returns: 215 ArrayPrepend expression with appropriate null_propagation flag 216 """ 217 return exp.ArrayPrepend( 218 this=seq_get(args, 0), 219 expression=seq_get(args, 1), 220 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 221 )
Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayPrepend expression with appropriate null_propagation flag
224def build_array_concat(args: t.List, dialect: Dialect) -> exp.ArrayConcat: 225 """ 226 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 227 228 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 229 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 230 231 Args: 232 args: Function arguments [array1, array2, ...] (variadic) 233 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 234 235 Returns: 236 ArrayConcat expression with appropriate null_propagation flag 237 """ 238 return exp.ArrayConcat( 239 this=seq_get(args, 0), 240 expressions=args[1:], 241 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 242 )
Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
Arguments:
- args: Function arguments [array1, array2, ...] (variadic)
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayConcat expression with appropriate null_propagation flag
245def build_array_remove(args: t.List, dialect: Dialect) -> exp.ArrayRemove: 246 """ 247 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 248 249 Some dialects (Snowflake) return NULL when the removal value is NULL. 250 Others (DuckDB) may return empty array due to NULL comparison semantics. 251 252 Args: 253 args: Function arguments [array, value_to_remove] 254 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 255 256 Returns: 257 ArrayRemove expression with appropriate null_propagation flag 258 """ 259 return exp.ArrayRemove( 260 this=seq_get(args, 0), 261 expression=seq_get(args, 1), 262 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 263 )
Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics.
Arguments:
- args: Function arguments [array, value_to_remove]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayRemove expression with appropriate null_propagation flag
276class Parser(metaclass=_Parser): 277 """ 278 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 279 280 Args: 281 error_level: The desired error level. 282 Default: ErrorLevel.IMMEDIATE 283 error_message_context: The amount of context to capture from a query string when displaying 284 the error message (in number of characters). 285 Default: 100 286 max_errors: Maximum number of error messages to include in a raised ParseError. 287 This is only relevant if error_level is ErrorLevel.RAISE. 288 Default: 3 289 """ 290 291 FUNCTIONS: t.Dict[str, t.Callable] = { 292 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 293 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 294 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 295 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 296 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 297 ), 298 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 299 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 300 ), 301 "ARRAY_APPEND": build_array_append, 302 "ARRAY_CAT": build_array_concat, 303 "ARRAY_CONCAT": build_array_concat, 304 "ARRAY_PREPEND": build_array_prepend, 305 "ARRAY_REMOVE": build_array_remove, 306 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 307 "CONCAT": lambda args, dialect: exp.Concat( 308 expressions=args, 309 safe=not dialect.STRICT_STRING_CONCAT, 310 coalesce=dialect.CONCAT_COALESCE, 311 ), 312 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 313 expressions=args, 314 safe=not dialect.STRICT_STRING_CONCAT, 315 coalesce=dialect.CONCAT_COALESCE, 316 ), 317 "CONVERT_TIMEZONE": build_convert_timezone, 318 "DATE_TO_DATE_STR": lambda args: exp.Cast( 319 this=seq_get(args, 0), 320 to=exp.DataType(this=exp.DataType.Type.TEXT), 321 ), 322 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 323 start=seq_get(args, 0), 324 end=seq_get(args, 1), 325 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 326 ), 327 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 328 is_string=dialect.UUID_IS_STRING_TYPE or None 329 ), 330 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 331 "GREATEST": lambda args, dialect: exp.Greatest( 332 this=seq_get(args, 0), 333 expressions=args[1:], 334 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 335 ), 336 "LEAST": lambda args, dialect: exp.Least( 337 this=seq_get(args, 0), 338 expressions=args[1:], 339 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 340 ), 341 "HEX": build_hex, 342 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 343 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 344 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 345 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "LIKE": build_like, 349 "LOG": build_logarithm, 350 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 351 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 352 "LOWER": build_lower, 353 "LPAD": lambda args: build_pad(args), 354 "LEFTPAD": lambda args: build_pad(args), 355 "LTRIM": lambda args: build_trim(args), 356 "MOD": build_mod, 357 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 358 "RPAD": lambda args: build_pad(args, is_left=False), 359 "RTRIM": lambda args: build_trim(args, is_left=False), 360 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 361 if len(args) != 2 362 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 363 "STRPOS": exp.StrPosition.from_arg_list, 364 "CHARINDEX": lambda args: build_locate_strposition(args), 365 "INSTR": exp.StrPosition.from_arg_list, 366 "LOCATE": lambda args: build_locate_strposition(args), 367 "TIME_TO_TIME_STR": lambda args: exp.Cast( 368 this=seq_get(args, 0), 369 to=exp.DataType(this=exp.DataType.Type.TEXT), 370 ), 371 "TO_HEX": build_hex, 372 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 373 this=exp.Cast( 374 this=seq_get(args, 0), 375 to=exp.DataType(this=exp.DataType.Type.TEXT), 376 ), 377 start=exp.Literal.number(1), 378 length=exp.Literal.number(10), 379 ), 380 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 381 "UPPER": build_upper, 382 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 383 "VAR_MAP": build_var_map, 384 } 385 386 NO_PAREN_FUNCTIONS = { 387 TokenType.CURRENT_DATE: exp.CurrentDate, 388 TokenType.CURRENT_DATETIME: exp.CurrentDate, 389 TokenType.CURRENT_TIME: exp.CurrentTime, 390 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 391 TokenType.CURRENT_USER: exp.CurrentUser, 392 TokenType.LOCALTIME: exp.Localtime, 393 TokenType.LOCALTIMESTAMP: exp.Localtimestamp, 394 TokenType.CURRENT_ROLE: exp.CurrentRole, 395 } 396 397 STRUCT_TYPE_TOKENS = { 398 TokenType.FILE, 399 TokenType.NESTED, 400 TokenType.OBJECT, 401 TokenType.STRUCT, 402 TokenType.UNION, 403 } 404 405 NESTED_TYPE_TOKENS = { 406 TokenType.ARRAY, 407 TokenType.LIST, 408 TokenType.LOWCARDINALITY, 409 TokenType.MAP, 410 TokenType.NULLABLE, 411 TokenType.RANGE, 412 *STRUCT_TYPE_TOKENS, 413 } 414 415 ENUM_TYPE_TOKENS = { 416 TokenType.DYNAMIC, 417 TokenType.ENUM, 418 TokenType.ENUM8, 419 TokenType.ENUM16, 420 } 421 422 AGGREGATE_TYPE_TOKENS = { 423 TokenType.AGGREGATEFUNCTION, 424 TokenType.SIMPLEAGGREGATEFUNCTION, 425 } 426 427 TYPE_TOKENS = { 428 TokenType.BIT, 429 TokenType.BOOLEAN, 430 TokenType.TINYINT, 431 TokenType.UTINYINT, 432 TokenType.SMALLINT, 433 TokenType.USMALLINT, 434 TokenType.INT, 435 TokenType.UINT, 436 TokenType.BIGINT, 437 TokenType.UBIGINT, 438 TokenType.BIGNUM, 439 TokenType.INT128, 440 TokenType.UINT128, 441 TokenType.INT256, 442 TokenType.UINT256, 443 TokenType.MEDIUMINT, 444 TokenType.UMEDIUMINT, 445 TokenType.FIXEDSTRING, 446 TokenType.FLOAT, 447 TokenType.DOUBLE, 448 TokenType.UDOUBLE, 449 TokenType.CHAR, 450 TokenType.NCHAR, 451 TokenType.VARCHAR, 452 TokenType.NVARCHAR, 453 TokenType.BPCHAR, 454 TokenType.TEXT, 455 TokenType.MEDIUMTEXT, 456 TokenType.LONGTEXT, 457 TokenType.BLOB, 458 TokenType.MEDIUMBLOB, 459 TokenType.LONGBLOB, 460 TokenType.BINARY, 461 TokenType.VARBINARY, 462 TokenType.JSON, 463 TokenType.JSONB, 464 TokenType.INTERVAL, 465 TokenType.TINYBLOB, 466 TokenType.TINYTEXT, 467 TokenType.TIME, 468 TokenType.TIMETZ, 469 TokenType.TIME_NS, 470 TokenType.TIMESTAMP, 471 TokenType.TIMESTAMP_S, 472 TokenType.TIMESTAMP_MS, 473 TokenType.TIMESTAMP_NS, 474 TokenType.TIMESTAMPTZ, 475 TokenType.TIMESTAMPLTZ, 476 TokenType.TIMESTAMPNTZ, 477 TokenType.DATETIME, 478 TokenType.DATETIME2, 479 TokenType.DATETIME64, 480 TokenType.SMALLDATETIME, 481 TokenType.DATE, 482 TokenType.DATE32, 483 TokenType.INT4RANGE, 484 TokenType.INT4MULTIRANGE, 485 TokenType.INT8RANGE, 486 TokenType.INT8MULTIRANGE, 487 TokenType.NUMRANGE, 488 TokenType.NUMMULTIRANGE, 489 TokenType.TSRANGE, 490 TokenType.TSMULTIRANGE, 491 TokenType.TSTZRANGE, 492 TokenType.TSTZMULTIRANGE, 493 TokenType.DATERANGE, 494 TokenType.DATEMULTIRANGE, 495 TokenType.DECIMAL, 496 TokenType.DECIMAL32, 497 TokenType.DECIMAL64, 498 TokenType.DECIMAL128, 499 TokenType.DECIMAL256, 500 TokenType.DECFLOAT, 501 TokenType.UDECIMAL, 502 TokenType.BIGDECIMAL, 503 TokenType.UUID, 504 TokenType.GEOGRAPHY, 505 TokenType.GEOGRAPHYPOINT, 506 TokenType.GEOMETRY, 507 TokenType.POINT, 508 TokenType.RING, 509 TokenType.LINESTRING, 510 TokenType.MULTILINESTRING, 511 TokenType.POLYGON, 512 TokenType.MULTIPOLYGON, 513 TokenType.HLLSKETCH, 514 TokenType.HSTORE, 515 TokenType.PSEUDO_TYPE, 516 TokenType.SUPER, 517 TokenType.SERIAL, 518 TokenType.SMALLSERIAL, 519 TokenType.BIGSERIAL, 520 TokenType.XML, 521 TokenType.YEAR, 522 TokenType.USERDEFINED, 523 TokenType.MONEY, 524 TokenType.SMALLMONEY, 525 TokenType.ROWVERSION, 526 TokenType.IMAGE, 527 TokenType.VARIANT, 528 TokenType.VECTOR, 529 TokenType.VOID, 530 TokenType.OBJECT, 531 TokenType.OBJECT_IDENTIFIER, 532 TokenType.INET, 533 TokenType.IPADDRESS, 534 TokenType.IPPREFIX, 535 TokenType.IPV4, 536 TokenType.IPV6, 537 TokenType.UNKNOWN, 538 TokenType.NOTHING, 539 TokenType.NULL, 540 TokenType.NAME, 541 TokenType.TDIGEST, 542 TokenType.DYNAMIC, 543 *ENUM_TYPE_TOKENS, 544 *NESTED_TYPE_TOKENS, 545 *AGGREGATE_TYPE_TOKENS, 546 } 547 548 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 549 TokenType.BIGINT: TokenType.UBIGINT, 550 TokenType.INT: TokenType.UINT, 551 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 552 TokenType.SMALLINT: TokenType.USMALLINT, 553 TokenType.TINYINT: TokenType.UTINYINT, 554 TokenType.DECIMAL: TokenType.UDECIMAL, 555 TokenType.DOUBLE: TokenType.UDOUBLE, 556 } 557 558 SUBQUERY_PREDICATES = { 559 TokenType.ANY: exp.Any, 560 TokenType.ALL: exp.All, 561 TokenType.EXISTS: exp.Exists, 562 TokenType.SOME: exp.Any, 563 } 564 565 RESERVED_TOKENS = { 566 *Tokenizer.SINGLE_TOKENS.values(), 567 TokenType.SELECT, 568 } - {TokenType.IDENTIFIER} 569 570 DB_CREATABLES = { 571 TokenType.DATABASE, 572 TokenType.DICTIONARY, 573 TokenType.FILE_FORMAT, 574 TokenType.MODEL, 575 TokenType.NAMESPACE, 576 TokenType.SCHEMA, 577 TokenType.SEMANTIC_VIEW, 578 TokenType.SEQUENCE, 579 TokenType.SINK, 580 TokenType.SOURCE, 581 TokenType.STAGE, 582 TokenType.STORAGE_INTEGRATION, 583 TokenType.STREAMLIT, 584 TokenType.TABLE, 585 TokenType.TAG, 586 TokenType.VIEW, 587 TokenType.WAREHOUSE, 588 } 589 590 CREATABLES = { 591 TokenType.COLUMN, 592 TokenType.CONSTRAINT, 593 TokenType.FOREIGN_KEY, 594 TokenType.FUNCTION, 595 TokenType.INDEX, 596 TokenType.PROCEDURE, 597 *DB_CREATABLES, 598 } 599 600 ALTERABLES = { 601 TokenType.INDEX, 602 TokenType.TABLE, 603 TokenType.VIEW, 604 TokenType.SESSION, 605 } 606 607 # Tokens that can represent identifiers 608 ID_VAR_TOKENS = { 609 TokenType.ALL, 610 TokenType.ANALYZE, 611 TokenType.ATTACH, 612 TokenType.VAR, 613 TokenType.ANTI, 614 TokenType.APPLY, 615 TokenType.ASC, 616 TokenType.ASOF, 617 TokenType.AUTO_INCREMENT, 618 TokenType.BEGIN, 619 TokenType.BPCHAR, 620 TokenType.CACHE, 621 TokenType.CASE, 622 TokenType.COLLATE, 623 TokenType.COMMAND, 624 TokenType.COMMENT, 625 TokenType.COMMIT, 626 TokenType.CONSTRAINT, 627 TokenType.COPY, 628 TokenType.CUBE, 629 TokenType.CURRENT_SCHEMA, 630 TokenType.DEFAULT, 631 TokenType.DELETE, 632 TokenType.DESC, 633 TokenType.DESCRIBE, 634 TokenType.DETACH, 635 TokenType.DICTIONARY, 636 TokenType.DIV, 637 TokenType.END, 638 TokenType.EXECUTE, 639 TokenType.EXPORT, 640 TokenType.ESCAPE, 641 TokenType.FALSE, 642 TokenType.FIRST, 643 TokenType.FILTER, 644 TokenType.FINAL, 645 TokenType.FORMAT, 646 TokenType.FULL, 647 TokenType.GET, 648 TokenType.IDENTIFIER, 649 TokenType.INOUT, 650 TokenType.IS, 651 TokenType.ISNULL, 652 TokenType.INTERVAL, 653 TokenType.KEEP, 654 TokenType.KILL, 655 TokenType.LEFT, 656 TokenType.LIMIT, 657 TokenType.LOAD, 658 TokenType.LOCK, 659 TokenType.MATCH, 660 TokenType.MERGE, 661 TokenType.NATURAL, 662 TokenType.NEXT, 663 TokenType.OFFSET, 664 TokenType.OPERATOR, 665 TokenType.ORDINALITY, 666 TokenType.OVER, 667 TokenType.OVERLAPS, 668 TokenType.OVERWRITE, 669 TokenType.PARTITION, 670 TokenType.PERCENT, 671 TokenType.PIVOT, 672 TokenType.PRAGMA, 673 TokenType.PUT, 674 TokenType.RANGE, 675 TokenType.RECURSIVE, 676 TokenType.REFERENCES, 677 TokenType.REFRESH, 678 TokenType.RENAME, 679 TokenType.REPLACE, 680 TokenType.RIGHT, 681 TokenType.ROLLUP, 682 TokenType.ROW, 683 TokenType.ROWS, 684 TokenType.SEMI, 685 TokenType.SET, 686 TokenType.SETTINGS, 687 TokenType.SHOW, 688 TokenType.TEMPORARY, 689 TokenType.TOP, 690 TokenType.TRUE, 691 TokenType.TRUNCATE, 692 TokenType.UNIQUE, 693 TokenType.UNNEST, 694 TokenType.UNPIVOT, 695 TokenType.UPDATE, 696 TokenType.USE, 697 TokenType.VOLATILE, 698 TokenType.WINDOW, 699 *ALTERABLES, 700 *CREATABLES, 701 *SUBQUERY_PREDICATES, 702 *TYPE_TOKENS, 703 *NO_PAREN_FUNCTIONS, 704 } 705 ID_VAR_TOKENS.remove(TokenType.UNION) 706 707 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 708 TokenType.ANTI, 709 TokenType.ASOF, 710 TokenType.FULL, 711 TokenType.LEFT, 712 TokenType.LOCK, 713 TokenType.NATURAL, 714 TokenType.RIGHT, 715 TokenType.SEMI, 716 TokenType.WINDOW, 717 } 718 719 ALIAS_TOKENS = ID_VAR_TOKENS 720 721 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 722 723 ARRAY_CONSTRUCTORS = { 724 "ARRAY": exp.Array, 725 "LIST": exp.List, 726 } 727 728 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 729 730 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 731 732 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 733 734 FUNC_TOKENS = { 735 TokenType.COLLATE, 736 TokenType.COMMAND, 737 TokenType.CURRENT_DATE, 738 TokenType.CURRENT_DATETIME, 739 TokenType.CURRENT_SCHEMA, 740 TokenType.CURRENT_TIMESTAMP, 741 TokenType.CURRENT_TIME, 742 TokenType.CURRENT_USER, 743 TokenType.CURRENT_CATALOG, 744 TokenType.FILTER, 745 TokenType.FIRST, 746 TokenType.FORMAT, 747 TokenType.GET, 748 TokenType.GLOB, 749 TokenType.IDENTIFIER, 750 TokenType.INDEX, 751 TokenType.ISNULL, 752 TokenType.ILIKE, 753 TokenType.INSERT, 754 TokenType.LIKE, 755 TokenType.LOCALTIME, 756 TokenType.LOCALTIMESTAMP, 757 TokenType.MERGE, 758 TokenType.NEXT, 759 TokenType.OFFSET, 760 TokenType.PRIMARY_KEY, 761 TokenType.RANGE, 762 TokenType.REPLACE, 763 TokenType.RLIKE, 764 TokenType.ROW, 765 TokenType.SESSION_USER, 766 TokenType.UNNEST, 767 TokenType.VAR, 768 TokenType.LEFT, 769 TokenType.RIGHT, 770 TokenType.SEQUENCE, 771 TokenType.DATE, 772 TokenType.DATETIME, 773 TokenType.TABLE, 774 TokenType.TIMESTAMP, 775 TokenType.TIMESTAMPTZ, 776 TokenType.TRUNCATE, 777 TokenType.UTC_DATE, 778 TokenType.UTC_TIME, 779 TokenType.UTC_TIMESTAMP, 780 TokenType.WINDOW, 781 TokenType.XOR, 782 *TYPE_TOKENS, 783 *SUBQUERY_PREDICATES, 784 } 785 786 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 787 TokenType.AND: exp.And, 788 } 789 790 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 791 TokenType.COLON_EQ: exp.PropertyEQ, 792 } 793 794 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 795 TokenType.OR: exp.Or, 796 } 797 798 EQUALITY = { 799 TokenType.EQ: exp.EQ, 800 TokenType.NEQ: exp.NEQ, 801 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 802 } 803 804 COMPARISON = { 805 TokenType.GT: exp.GT, 806 TokenType.GTE: exp.GTE, 807 TokenType.LT: exp.LT, 808 TokenType.LTE: exp.LTE, 809 } 810 811 BITWISE = { 812 TokenType.AMP: exp.BitwiseAnd, 813 TokenType.CARET: exp.BitwiseXor, 814 TokenType.PIPE: exp.BitwiseOr, 815 } 816 817 TERM = { 818 TokenType.DASH: exp.Sub, 819 TokenType.PLUS: exp.Add, 820 TokenType.MOD: exp.Mod, 821 TokenType.COLLATE: exp.Collate, 822 } 823 824 FACTOR = { 825 TokenType.DIV: exp.IntDiv, 826 TokenType.LR_ARROW: exp.Distance, 827 TokenType.SLASH: exp.Div, 828 TokenType.STAR: exp.Mul, 829 } 830 831 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 832 833 TIMES = { 834 TokenType.TIME, 835 TokenType.TIMETZ, 836 } 837 838 TIMESTAMPS = { 839 TokenType.TIMESTAMP, 840 TokenType.TIMESTAMPNTZ, 841 TokenType.TIMESTAMPTZ, 842 TokenType.TIMESTAMPLTZ, 843 *TIMES, 844 } 845 846 SET_OPERATIONS = { 847 TokenType.UNION, 848 TokenType.INTERSECT, 849 TokenType.EXCEPT, 850 } 851 852 JOIN_METHODS = { 853 TokenType.ASOF, 854 TokenType.NATURAL, 855 TokenType.POSITIONAL, 856 } 857 858 JOIN_SIDES = { 859 TokenType.LEFT, 860 TokenType.RIGHT, 861 TokenType.FULL, 862 } 863 864 JOIN_KINDS = { 865 TokenType.ANTI, 866 TokenType.CROSS, 867 TokenType.INNER, 868 TokenType.OUTER, 869 TokenType.SEMI, 870 TokenType.STRAIGHT_JOIN, 871 } 872 873 JOIN_HINTS: t.Set[str] = set() 874 875 LAMBDAS = { 876 TokenType.ARROW: lambda self, expressions: self.expression( 877 exp.Lambda, 878 this=self._replace_lambda( 879 self._parse_disjunction(), 880 expressions, 881 ), 882 expressions=expressions, 883 ), 884 TokenType.FARROW: lambda self, expressions: self.expression( 885 exp.Kwarg, 886 this=exp.var(expressions[0].name), 887 expression=self._parse_disjunction(), 888 ), 889 } 890 891 COLUMN_OPERATORS = { 892 TokenType.DOT: None, 893 TokenType.DOTCOLON: lambda self, this, to: self.expression( 894 exp.JSONCast, 895 this=this, 896 to=to, 897 ), 898 TokenType.DCOLON: lambda self, this, to: self.build_cast( 899 strict=self.STRICT_CAST, this=this, to=to 900 ), 901 TokenType.ARROW: lambda self, this, path: self.expression( 902 exp.JSONExtract, 903 this=this, 904 expression=self.dialect.to_json_path(path), 905 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 906 ), 907 TokenType.DARROW: lambda self, this, path: self.expression( 908 exp.JSONExtractScalar, 909 this=this, 910 expression=self.dialect.to_json_path(path), 911 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 912 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 913 ), 914 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 915 exp.JSONBExtract, 916 this=this, 917 expression=path, 918 ), 919 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 920 exp.JSONBExtractScalar, 921 this=this, 922 expression=path, 923 ), 924 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 925 exp.JSONBContains, 926 this=this, 927 expression=key, 928 ), 929 } 930 931 CAST_COLUMN_OPERATORS = { 932 TokenType.DOTCOLON, 933 TokenType.DCOLON, 934 } 935 936 EXPRESSION_PARSERS = { 937 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 938 exp.Column: lambda self: self._parse_column(), 939 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 940 exp.Condition: lambda self: self._parse_disjunction(), 941 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 942 exp.Expression: lambda self: self._parse_expression(), 943 exp.From: lambda self: self._parse_from(joins=True), 944 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 945 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 946 exp.Group: lambda self: self._parse_group(), 947 exp.Having: lambda self: self._parse_having(), 948 exp.Hint: lambda self: self._parse_hint_body(), 949 exp.Identifier: lambda self: self._parse_id_var(), 950 exp.Join: lambda self: self._parse_join(), 951 exp.Lambda: lambda self: self._parse_lambda(), 952 exp.Lateral: lambda self: self._parse_lateral(), 953 exp.Limit: lambda self: self._parse_limit(), 954 exp.Offset: lambda self: self._parse_offset(), 955 exp.Order: lambda self: self._parse_order(), 956 exp.Ordered: lambda self: self._parse_ordered(), 957 exp.Properties: lambda self: self._parse_properties(), 958 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 959 exp.Qualify: lambda self: self._parse_qualify(), 960 exp.Returning: lambda self: self._parse_returning(), 961 exp.Select: lambda self: self._parse_select(), 962 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 963 exp.Table: lambda self: self._parse_table_parts(), 964 exp.TableAlias: lambda self: self._parse_table_alias(), 965 exp.Tuple: lambda self: self._parse_value(values=False), 966 exp.Whens: lambda self: self._parse_when_matched(), 967 exp.Where: lambda self: self._parse_where(), 968 exp.Window: lambda self: self._parse_named_window(), 969 exp.With: lambda self: self._parse_with(), 970 "JOIN_TYPE": lambda self: self._parse_join_parts(), 971 } 972 973 STATEMENT_PARSERS = { 974 TokenType.ALTER: lambda self: self._parse_alter(), 975 TokenType.ANALYZE: lambda self: self._parse_analyze(), 976 TokenType.BEGIN: lambda self: self._parse_transaction(), 977 TokenType.CACHE: lambda self: self._parse_cache(), 978 TokenType.COMMENT: lambda self: self._parse_comment(), 979 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 980 TokenType.COPY: lambda self: self._parse_copy(), 981 TokenType.CREATE: lambda self: self._parse_create(), 982 TokenType.DELETE: lambda self: self._parse_delete(), 983 TokenType.DESC: lambda self: self._parse_describe(), 984 TokenType.DESCRIBE: lambda self: self._parse_describe(), 985 TokenType.DROP: lambda self: self._parse_drop(), 986 TokenType.GRANT: lambda self: self._parse_grant(), 987 TokenType.REVOKE: lambda self: self._parse_revoke(), 988 TokenType.INSERT: lambda self: self._parse_insert(), 989 TokenType.KILL: lambda self: self._parse_kill(), 990 TokenType.LOAD: lambda self: self._parse_load(), 991 TokenType.MERGE: lambda self: self._parse_merge(), 992 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 993 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 994 TokenType.REFRESH: lambda self: self._parse_refresh(), 995 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 996 TokenType.SET: lambda self: self._parse_set(), 997 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 998 TokenType.UNCACHE: lambda self: self._parse_uncache(), 999 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 1000 TokenType.UPDATE: lambda self: self._parse_update(), 1001 TokenType.USE: lambda self: self._parse_use(), 1002 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1003 } 1004 1005 UNARY_PARSERS = { 1006 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1007 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 1008 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 1009 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 1010 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 1011 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 1012 } 1013 1014 STRING_PARSERS = { 1015 TokenType.HEREDOC_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1016 TokenType.NATIONAL_STRING: lambda self, token: self.expression(exp.National, token=token), 1017 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1018 TokenType.STRING: lambda self, token: self.expression( 1019 exp.Literal, token=token, is_string=True 1020 ), 1021 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1022 exp.UnicodeString, 1023 token=token, 1024 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 1025 ), 1026 } 1027 1028 NUMERIC_PARSERS = { 1029 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, token=token), 1030 TokenType.BYTE_STRING: lambda self, token: self.expression( 1031 exp.ByteString, 1032 token=token, 1033 is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None, 1034 ), 1035 TokenType.HEX_STRING: lambda self, token: self.expression( 1036 exp.HexString, 1037 token=token, 1038 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 1039 ), 1040 TokenType.NUMBER: lambda self, token: self.expression( 1041 exp.Literal, token=token, is_string=False 1042 ), 1043 } 1044 1045 PRIMARY_PARSERS = { 1046 **STRING_PARSERS, 1047 **NUMERIC_PARSERS, 1048 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1049 TokenType.NULL: lambda self, _: self.expression(exp.Null), 1050 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 1051 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 1052 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1053 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1054 } 1055 1056 PLACEHOLDER_PARSERS = { 1057 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 1058 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1059 TokenType.COLON: lambda self: ( 1060 self.expression(exp.Placeholder, this=self._prev.text) 1061 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1062 else None 1063 ), 1064 } 1065 1066 RANGE_PARSERS = { 1067 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1068 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1069 TokenType.GLOB: binary_range_parser(exp.Glob), 1070 TokenType.ILIKE: binary_range_parser(exp.ILike), 1071 TokenType.IN: lambda self, this: self._parse_in(this), 1072 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1073 TokenType.IS: lambda self, this: self._parse_is(this), 1074 TokenType.LIKE: binary_range_parser(exp.Like), 1075 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1076 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1077 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1078 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1079 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1080 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1081 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1082 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1083 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1084 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1085 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1086 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1087 } 1088 1089 PIPE_SYNTAX_TRANSFORM_PARSERS = { 1090 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1091 "AS": lambda self, query: self._build_pipe_cte( 1092 query, [exp.Star()], self._parse_table_alias() 1093 ), 1094 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1095 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1096 "ORDER BY": lambda self, query: query.order_by( 1097 self._parse_order(), append=False, copy=False 1098 ), 1099 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1100 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1101 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1102 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1103 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1104 } 1105 1106 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 1107 "ALLOWED_VALUES": lambda self: self.expression( 1108 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 1109 ), 1110 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1111 "AUTO": lambda self: self._parse_auto_property(), 1112 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1113 "BACKUP": lambda self: self.expression( 1114 exp.BackupProperty, this=self._parse_var(any_token=True) 1115 ), 1116 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1117 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1118 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1119 "CHECKSUM": lambda self: self._parse_checksum(), 1120 "CLUSTER BY": lambda self: self._parse_cluster(), 1121 "CLUSTERED": lambda self: self._parse_clustered_by(), 1122 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1123 exp.CollateProperty, **kwargs 1124 ), 1125 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1126 "CONTAINS": lambda self: self._parse_contains_property(), 1127 "COPY": lambda self: self._parse_copy_property(), 1128 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1129 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1130 "DEFINER": lambda self: self._parse_definer(), 1131 "DETERMINISTIC": lambda self: self.expression( 1132 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1133 ), 1134 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1135 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1136 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1137 "DISTKEY": lambda self: self._parse_distkey(), 1138 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1139 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1140 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1141 "ENVIRONMENT": lambda self: self.expression( 1142 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1143 ), 1144 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1145 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1146 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1147 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1148 "FREESPACE": lambda self: self._parse_freespace(), 1149 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1150 "HEAP": lambda self: self.expression(exp.HeapProperty), 1151 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1152 "IMMUTABLE": lambda self: self.expression( 1153 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1154 ), 1155 "INHERITS": lambda self: self.expression( 1156 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1157 ), 1158 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1159 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1160 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1161 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1162 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1163 "LIKE": lambda self: self._parse_create_like(), 1164 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1165 "LOCK": lambda self: self._parse_locking(), 1166 "LOCKING": lambda self: self._parse_locking(), 1167 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1168 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1169 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1170 "MODIFIES": lambda self: self._parse_modifies_property(), 1171 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1172 "NO": lambda self: self._parse_no_property(), 1173 "ON": lambda self: self._parse_on_property(), 1174 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1175 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1176 "PARTITION": lambda self: self._parse_partitioned_of(), 1177 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1178 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1179 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1180 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1181 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1182 "READS": lambda self: self._parse_reads_property(), 1183 "REMOTE": lambda self: self._parse_remote_with_connection(), 1184 "RETURNS": lambda self: self._parse_returns(), 1185 "STRICT": lambda self: self.expression(exp.StrictProperty), 1186 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1187 "ROW": lambda self: self._parse_row(), 1188 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1189 "SAMPLE": lambda self: self.expression( 1190 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1191 ), 1192 "SECURE": lambda self: self.expression(exp.SecureProperty), 1193 "SECURITY": lambda self: self._parse_security(), 1194 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1195 "SETTINGS": lambda self: self._parse_settings_property(), 1196 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1197 "SORTKEY": lambda self: self._parse_sortkey(), 1198 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1199 "STABLE": lambda self: self.expression( 1200 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1201 ), 1202 "STORED": lambda self: self._parse_stored(), 1203 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1204 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1205 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1206 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1207 "TO": lambda self: self._parse_to_table(), 1208 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1209 "TRANSFORM": lambda self: self.expression( 1210 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1211 ), 1212 "TTL": lambda self: self._parse_ttl(), 1213 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1214 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1215 "VOLATILE": lambda self: self._parse_volatile_property(), 1216 "WITH": lambda self: self._parse_with_property(), 1217 } 1218 1219 CONSTRAINT_PARSERS = { 1220 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1221 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1222 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1223 "CHARACTER SET": lambda self: self.expression( 1224 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1225 ), 1226 "CHECK": lambda self: self._parse_check_constraint(), 1227 "COLLATE": lambda self: self.expression( 1228 exp.CollateColumnConstraint, 1229 this=self._parse_identifier() or self._parse_column(), 1230 ), 1231 "COMMENT": lambda self: self.expression( 1232 exp.CommentColumnConstraint, this=self._parse_string() 1233 ), 1234 "COMPRESS": lambda self: self._parse_compress(), 1235 "CLUSTERED": lambda self: self.expression( 1236 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1237 ), 1238 "NONCLUSTERED": lambda self: self.expression( 1239 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1240 ), 1241 "DEFAULT": lambda self: self.expression( 1242 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1243 ), 1244 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1245 "EPHEMERAL": lambda self: self.expression( 1246 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1247 ), 1248 "EXCLUDE": lambda self: self.expression( 1249 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1250 ), 1251 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1252 "FORMAT": lambda self: self.expression( 1253 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1254 ), 1255 "GENERATED": lambda self: self._parse_generated_as_identity(), 1256 "IDENTITY": lambda self: self._parse_auto_increment(), 1257 "INLINE": lambda self: self._parse_inline(), 1258 "LIKE": lambda self: self._parse_create_like(), 1259 "NOT": lambda self: self._parse_not_constraint(), 1260 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1261 "ON": lambda self: ( 1262 self._match(TokenType.UPDATE) 1263 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1264 ) 1265 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1266 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1267 "PERIOD": lambda self: self._parse_period_for_system_time(), 1268 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1269 "REFERENCES": lambda self: self._parse_references(match=False), 1270 "TITLE": lambda self: self.expression( 1271 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1272 ), 1273 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1274 "UNIQUE": lambda self: self._parse_unique(), 1275 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1276 "WITH": lambda self: self.expression( 1277 exp.Properties, expressions=self._parse_wrapped_properties() 1278 ), 1279 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1280 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1281 } 1282 1283 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1284 if not self._match(TokenType.L_PAREN, advance=False): 1285 # Partitioning by bucket or truncate follows the syntax: 1286 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1287 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1288 self._retreat(self._index - 1) 1289 return None 1290 1291 klass = ( 1292 exp.PartitionedByBucket 1293 if self._prev.text.upper() == "BUCKET" 1294 else exp.PartitionByTruncate 1295 ) 1296 1297 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1298 this, expression = seq_get(args, 0), seq_get(args, 1) 1299 1300 if isinstance(this, exp.Literal): 1301 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1302 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1303 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1304 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1305 # 1306 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1307 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1308 this, expression = expression, this 1309 1310 return self.expression(klass, this=this, expression=expression) 1311 1312 ALTER_PARSERS = { 1313 "ADD": lambda self: self._parse_alter_table_add(), 1314 "AS": lambda self: self._parse_select(), 1315 "ALTER": lambda self: self._parse_alter_table_alter(), 1316 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1317 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1318 "DROP": lambda self: self._parse_alter_table_drop(), 1319 "RENAME": lambda self: self._parse_alter_table_rename(), 1320 "SET": lambda self: self._parse_alter_table_set(), 1321 "SWAP": lambda self: self.expression( 1322 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1323 ), 1324 } 1325 1326 ALTER_ALTER_PARSERS = { 1327 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1328 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1329 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1330 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1331 } 1332 1333 SCHEMA_UNNAMED_CONSTRAINTS = { 1334 "CHECK", 1335 "EXCLUDE", 1336 "FOREIGN KEY", 1337 "LIKE", 1338 "PERIOD", 1339 "PRIMARY KEY", 1340 "UNIQUE", 1341 "BUCKET", 1342 "TRUNCATE", 1343 } 1344 1345 NO_PAREN_FUNCTION_PARSERS = { 1346 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1347 "CASE": lambda self: self._parse_case(), 1348 "CONNECT_BY_ROOT": lambda self: self.expression( 1349 exp.ConnectByRoot, this=self._parse_column() 1350 ), 1351 "IF": lambda self: self._parse_if(), 1352 } 1353 1354 INVALID_FUNC_NAME_TOKENS = { 1355 TokenType.IDENTIFIER, 1356 TokenType.STRING, 1357 } 1358 1359 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1360 1361 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1362 1363 FUNCTION_PARSERS = { 1364 **{ 1365 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1366 }, 1367 **{ 1368 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1369 }, 1370 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1371 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1372 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1373 "CHAR": lambda self: self._parse_char(), 1374 "CHR": lambda self: self._parse_char(), 1375 "DECODE": lambda self: self._parse_decode(), 1376 "EXTRACT": lambda self: self._parse_extract(), 1377 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1378 "GAP_FILL": lambda self: self._parse_gap_fill(), 1379 "INITCAP": lambda self: self._parse_initcap(), 1380 "JSON_OBJECT": lambda self: self._parse_json_object(), 1381 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1382 "JSON_TABLE": lambda self: self._parse_json_table(), 1383 "MATCH": lambda self: self._parse_match_against(), 1384 "NORMALIZE": lambda self: self._parse_normalize(), 1385 "OPENJSON": lambda self: self._parse_open_json(), 1386 "OVERLAY": lambda self: self._parse_overlay(), 1387 "POSITION": lambda self: self._parse_position(), 1388 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1389 "STRING_AGG": lambda self: self._parse_string_agg(), 1390 "SUBSTRING": lambda self: self._parse_substring(), 1391 "TRIM": lambda self: self._parse_trim(), 1392 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1393 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1394 "XMLELEMENT": lambda self: self._parse_xml_element(), 1395 "XMLTABLE": lambda self: self._parse_xml_table(), 1396 } 1397 1398 QUERY_MODIFIER_PARSERS = { 1399 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1400 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1401 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1402 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1403 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1404 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1405 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1406 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1407 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1408 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1409 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1410 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1411 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1412 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1413 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1414 TokenType.CLUSTER_BY: lambda self: ( 1415 "cluster", 1416 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1417 ), 1418 TokenType.DISTRIBUTE_BY: lambda self: ( 1419 "distribute", 1420 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1421 ), 1422 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1423 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1424 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1425 } 1426 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1427 1428 SET_PARSERS = { 1429 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1430 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1431 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1432 "TRANSACTION": lambda self: self._parse_set_transaction(), 1433 } 1434 1435 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1436 1437 TYPE_LITERAL_PARSERS = { 1438 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1439 } 1440 1441 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1442 1443 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1444 1445 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1446 1447 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1448 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1449 "ISOLATION": ( 1450 ("LEVEL", "REPEATABLE", "READ"), 1451 ("LEVEL", "READ", "COMMITTED"), 1452 ("LEVEL", "READ", "UNCOMITTED"), 1453 ("LEVEL", "SERIALIZABLE"), 1454 ), 1455 "READ": ("WRITE", "ONLY"), 1456 } 1457 1458 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1459 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1460 ) 1461 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1462 1463 CREATE_SEQUENCE: OPTIONS_TYPE = { 1464 "SCALE": ("EXTEND", "NOEXTEND"), 1465 "SHARD": ("EXTEND", "NOEXTEND"), 1466 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1467 **dict.fromkeys( 1468 ( 1469 "SESSION", 1470 "GLOBAL", 1471 "KEEP", 1472 "NOKEEP", 1473 "ORDER", 1474 "NOORDER", 1475 "NOCACHE", 1476 "CYCLE", 1477 "NOCYCLE", 1478 "NOMINVALUE", 1479 "NOMAXVALUE", 1480 "NOSCALE", 1481 "NOSHARD", 1482 ), 1483 tuple(), 1484 ), 1485 } 1486 1487 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1488 1489 USABLES: OPTIONS_TYPE = dict.fromkeys( 1490 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1491 ) 1492 1493 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1494 1495 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1496 "TYPE": ("EVOLUTION",), 1497 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1498 } 1499 1500 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1501 1502 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1503 1504 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1505 "NOT": ("ENFORCED",), 1506 "MATCH": ( 1507 "FULL", 1508 "PARTIAL", 1509 "SIMPLE", 1510 ), 1511 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1512 "USING": ( 1513 "BTREE", 1514 "HASH", 1515 ), 1516 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1517 } 1518 1519 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1520 "NO": ("OTHERS",), 1521 "CURRENT": ("ROW",), 1522 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1523 } 1524 1525 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1526 1527 CLONE_KEYWORDS = {"CLONE", "COPY"} 1528 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1529 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1530 1531 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1532 1533 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1534 1535 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1536 1537 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1538 1539 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1540 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1541 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1542 1543 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1544 1545 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1546 1547 ADD_CONSTRAINT_TOKENS = { 1548 TokenType.CONSTRAINT, 1549 TokenType.FOREIGN_KEY, 1550 TokenType.INDEX, 1551 TokenType.KEY, 1552 TokenType.PRIMARY_KEY, 1553 TokenType.UNIQUE, 1554 } 1555 1556 DISTINCT_TOKENS = {TokenType.DISTINCT} 1557 1558 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1559 1560 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1561 1562 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1563 1564 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1565 1566 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1567 1568 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1569 1570 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1571 1572 # The style options for the DESCRIBE statement 1573 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1574 1575 SET_ASSIGNMENT_DELIMITERS = {"=", ":=", "TO"} 1576 1577 # The style options for the ANALYZE statement 1578 ANALYZE_STYLES = { 1579 "BUFFER_USAGE_LIMIT", 1580 "FULL", 1581 "LOCAL", 1582 "NO_WRITE_TO_BINLOG", 1583 "SAMPLE", 1584 "SKIP_LOCKED", 1585 "VERBOSE", 1586 } 1587 1588 ANALYZE_EXPRESSION_PARSERS = { 1589 "ALL": lambda self: self._parse_analyze_columns(), 1590 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1591 "DELETE": lambda self: self._parse_analyze_delete(), 1592 "DROP": lambda self: self._parse_analyze_histogram(), 1593 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1594 "LIST": lambda self: self._parse_analyze_list(), 1595 "PREDICATE": lambda self: self._parse_analyze_columns(), 1596 "UPDATE": lambda self: self._parse_analyze_histogram(), 1597 "VALIDATE": lambda self: self._parse_analyze_validate(), 1598 } 1599 1600 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1601 1602 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1603 1604 OPERATION_MODIFIERS: t.Set[str] = set() 1605 1606 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1607 1608 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1609 1610 STRICT_CAST = True 1611 1612 PREFIXED_PIVOT_COLUMNS = False 1613 IDENTIFY_PIVOT_STRINGS = False 1614 1615 LOG_DEFAULTS_TO_LN = False 1616 1617 # Whether the table sample clause expects CSV syntax 1618 TABLESAMPLE_CSV = False 1619 1620 # The default method used for table sampling 1621 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1622 1623 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1624 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1625 1626 # Whether the TRIM function expects the characters to trim as its first argument 1627 TRIM_PATTERN_FIRST = False 1628 1629 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1630 STRING_ALIASES = False 1631 1632 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1633 MODIFIERS_ATTACHED_TO_SET_OP = True 1634 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1635 1636 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1637 NO_PAREN_IF_COMMANDS = True 1638 1639 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1640 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1641 1642 # Whether the `:` operator is used to extract a value from a VARIANT column 1643 COLON_IS_VARIANT_EXTRACT = False 1644 1645 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1646 # If this is True and '(' is not found, the keyword will be treated as an identifier 1647 VALUES_FOLLOWED_BY_PAREN = True 1648 1649 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1650 SUPPORTS_IMPLICIT_UNNEST = False 1651 1652 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1653 INTERVAL_SPANS = True 1654 1655 # Whether a PARTITION clause can follow a table reference 1656 SUPPORTS_PARTITION_SELECTION = False 1657 1658 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1659 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1660 1661 # Whether the 'AS' keyword is optional in the CTE definition syntax 1662 OPTIONAL_ALIAS_TOKEN_CTE = True 1663 1664 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1665 ALTER_RENAME_REQUIRES_COLUMN = True 1666 1667 # Whether Alter statements are allowed to contain Partition specifications 1668 ALTER_TABLE_PARTITIONS = False 1669 1670 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1671 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1672 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1673 # as BigQuery, where all joins have the same precedence. 1674 JOINS_HAVE_EQUAL_PRECEDENCE = False 1675 1676 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1677 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1678 1679 # Whether map literals support arbitrary expressions as keys. 1680 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1681 # When False, keys are typically restricted to identifiers. 1682 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1683 1684 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1685 # is true for Snowflake but not for BigQuery which can also process strings 1686 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1687 1688 # Dialects like Databricks support JOINS without join criteria 1689 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1690 ADD_JOIN_ON_TRUE = False 1691 1692 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1693 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1694 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1695 1696 __slots__ = ( 1697 "error_level", 1698 "error_message_context", 1699 "max_errors", 1700 "dialect", 1701 "sql", 1702 "errors", 1703 "_tokens", 1704 "_index", 1705 "_curr", 1706 "_next", 1707 "_prev", 1708 "_prev_comments", 1709 "_pipe_cte_counter", 1710 ) 1711 1712 # Autofilled 1713 SHOW_TRIE: t.Dict = {} 1714 SET_TRIE: t.Dict = {} 1715 1716 def __init__( 1717 self, 1718 error_level: t.Optional[ErrorLevel] = None, 1719 error_message_context: int = 100, 1720 max_errors: int = 3, 1721 dialect: DialectType = None, 1722 ): 1723 from sqlglot.dialects import Dialect 1724 1725 self.error_level = error_level or ErrorLevel.IMMEDIATE 1726 self.error_message_context = error_message_context 1727 self.max_errors = max_errors 1728 self.dialect = Dialect.get_or_raise(dialect) 1729 self.reset() 1730 1731 def reset(self): 1732 self.sql = "" 1733 self.errors = [] 1734 self._tokens = [] 1735 self._index = 0 1736 self._curr = None 1737 self._next = None 1738 self._prev = None 1739 self._prev_comments = None 1740 self._pipe_cte_counter = 0 1741 1742 def parse( 1743 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1744 ) -> t.List[t.Optional[exp.Expression]]: 1745 """ 1746 Parses a list of tokens and returns a list of syntax trees, one tree 1747 per parsed SQL statement. 1748 1749 Args: 1750 raw_tokens: The list of tokens. 1751 sql: The original SQL string, used to produce helpful debug messages. 1752 1753 Returns: 1754 The list of the produced syntax trees. 1755 """ 1756 return self._parse( 1757 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1758 ) 1759 1760 def parse_into( 1761 self, 1762 expression_types: exp.IntoType, 1763 raw_tokens: t.List[Token], 1764 sql: t.Optional[str] = None, 1765 ) -> t.List[t.Optional[exp.Expression]]: 1766 """ 1767 Parses a list of tokens into a given Expression type. If a collection of Expression 1768 types is given instead, this method will try to parse the token list into each one 1769 of them, stopping at the first for which the parsing succeeds. 1770 1771 Args: 1772 expression_types: The expression type(s) to try and parse the token list into. 1773 raw_tokens: The list of tokens. 1774 sql: The original SQL string, used to produce helpful debug messages. 1775 1776 Returns: 1777 The target Expression. 1778 """ 1779 errors = [] 1780 for expression_type in ensure_list(expression_types): 1781 parser = self.EXPRESSION_PARSERS.get(expression_type) 1782 if not parser: 1783 raise TypeError(f"No parser registered for {expression_type}") 1784 1785 try: 1786 return self._parse(parser, raw_tokens, sql) 1787 except ParseError as e: 1788 e.errors[0]["into_expression"] = expression_type 1789 errors.append(e) 1790 1791 raise ParseError( 1792 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1793 errors=merge_errors(errors), 1794 ) from errors[-1] 1795 1796 def _parse( 1797 self, 1798 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1799 raw_tokens: t.List[Token], 1800 sql: t.Optional[str] = None, 1801 ) -> t.List[t.Optional[exp.Expression]]: 1802 self.reset() 1803 self.sql = sql or "" 1804 1805 total = len(raw_tokens) 1806 chunks: t.List[t.List[Token]] = [[]] 1807 1808 for i, token in enumerate(raw_tokens): 1809 if token.token_type == TokenType.SEMICOLON: 1810 if token.comments: 1811 chunks.append([token]) 1812 1813 if i < total - 1: 1814 chunks.append([]) 1815 else: 1816 chunks[-1].append(token) 1817 1818 expressions = [] 1819 1820 for tokens in chunks: 1821 self._index = -1 1822 self._tokens = tokens 1823 self._advance() 1824 1825 expressions.append(parse_method(self)) 1826 1827 if self._index < len(self._tokens): 1828 self.raise_error("Invalid expression / Unexpected token") 1829 1830 self.check_errors() 1831 1832 return expressions 1833 1834 def check_errors(self) -> None: 1835 """Logs or raises any found errors, depending on the chosen error level setting.""" 1836 if self.error_level == ErrorLevel.WARN: 1837 for error in self.errors: 1838 logger.error(str(error)) 1839 elif self.error_level == ErrorLevel.RAISE and self.errors: 1840 raise ParseError( 1841 concat_messages(self.errors, self.max_errors), 1842 errors=merge_errors(self.errors), 1843 ) 1844 1845 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1846 """ 1847 Appends an error in the list of recorded errors or raises it, depending on the chosen 1848 error level setting. 1849 """ 1850 token = token or self._curr or self._prev or Token.string("") 1851 formatted_sql, start_context, highlight, end_context = highlight_sql( 1852 sql=self.sql, 1853 positions=[(token.start, token.end)], 1854 context_length=self.error_message_context, 1855 ) 1856 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1857 1858 error = ParseError.new( 1859 formatted_message, 1860 description=message, 1861 line=token.line, 1862 col=token.col, 1863 start_context=start_context, 1864 highlight=highlight, 1865 end_context=end_context, 1866 ) 1867 1868 if self.error_level == ErrorLevel.IMMEDIATE: 1869 raise error 1870 1871 self.errors.append(error) 1872 1873 def expression( 1874 self, 1875 exp_class: t.Type[E], 1876 token: t.Optional[Token] = None, 1877 comments: t.Optional[t.List[str]] = None, 1878 **kwargs, 1879 ) -> E: 1880 """ 1881 Creates a new, validated Expression. 1882 1883 Args: 1884 exp_class: The expression class to instantiate. 1885 comments: An optional list of comments to attach to the expression. 1886 kwargs: The arguments to set for the expression along with their respective values. 1887 1888 Returns: 1889 The target expression. 1890 """ 1891 if token: 1892 instance = exp_class(this=token.text, **kwargs) 1893 instance.update_positions(token) 1894 else: 1895 instance = exp_class(**kwargs) 1896 instance.add_comments(comments) if comments else self._add_comments(instance) 1897 return self.validate_expression(instance) 1898 1899 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1900 if expression and self._prev_comments: 1901 expression.add_comments(self._prev_comments) 1902 self._prev_comments = None 1903 1904 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1905 """ 1906 Validates an Expression, making sure that all its mandatory arguments are set. 1907 1908 Args: 1909 expression: The expression to validate. 1910 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1911 1912 Returns: 1913 The validated expression. 1914 """ 1915 if self.error_level != ErrorLevel.IGNORE: 1916 for error_message in expression.error_messages(args): 1917 self.raise_error(error_message) 1918 1919 return expression 1920 1921 def _find_sql(self, start: Token, end: Token) -> str: 1922 return self.sql[start.start : end.end + 1] 1923 1924 def _is_connected(self) -> bool: 1925 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1926 1927 def _advance(self, times: int = 1) -> None: 1928 self._index += times 1929 self._curr = seq_get(self._tokens, self._index) 1930 self._next = seq_get(self._tokens, self._index + 1) 1931 1932 if self._index > 0: 1933 self._prev = self._tokens[self._index - 1] 1934 self._prev_comments = self._prev.comments 1935 else: 1936 self._prev = None 1937 self._prev_comments = None 1938 1939 def _retreat(self, index: int) -> None: 1940 if index != self._index: 1941 self._advance(index - self._index) 1942 1943 def _warn_unsupported(self) -> None: 1944 if len(self._tokens) <= 1: 1945 return 1946 1947 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1948 # interested in emitting a warning for the one being currently processed. 1949 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1950 1951 logger.warning( 1952 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1953 ) 1954 1955 def _parse_command(self) -> exp.Command: 1956 self._warn_unsupported() 1957 return self.expression( 1958 exp.Command, 1959 comments=self._prev_comments, 1960 this=self._prev.text.upper(), 1961 expression=self._parse_string(), 1962 ) 1963 1964 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1965 """ 1966 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1967 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1968 solve this by setting & resetting the parser state accordingly 1969 """ 1970 index = self._index 1971 error_level = self.error_level 1972 1973 self.error_level = ErrorLevel.IMMEDIATE 1974 try: 1975 this = parse_method() 1976 except ParseError: 1977 this = None 1978 finally: 1979 if not this or retreat: 1980 self._retreat(index) 1981 self.error_level = error_level 1982 1983 return this 1984 1985 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1986 start = self._prev 1987 exists = self._parse_exists() if allow_exists else None 1988 1989 self._match(TokenType.ON) 1990 1991 materialized = self._match_text_seq("MATERIALIZED") 1992 kind = self._match_set(self.CREATABLES) and self._prev 1993 if not kind: 1994 return self._parse_as_command(start) 1995 1996 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1997 this = self._parse_user_defined_function(kind=kind.token_type) 1998 elif kind.token_type == TokenType.TABLE: 1999 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 2000 elif kind.token_type == TokenType.COLUMN: 2001 this = self._parse_column() 2002 else: 2003 this = self._parse_id_var() 2004 2005 self._match(TokenType.IS) 2006 2007 return self.expression( 2008 exp.Comment, 2009 this=this, 2010 kind=kind.text, 2011 expression=self._parse_string(), 2012 exists=exists, 2013 materialized=materialized, 2014 ) 2015 2016 def _parse_to_table( 2017 self, 2018 ) -> exp.ToTableProperty: 2019 table = self._parse_table_parts(schema=True) 2020 return self.expression(exp.ToTableProperty, this=table) 2021 2022 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2023 def _parse_ttl(self) -> exp.Expression: 2024 def _parse_ttl_action() -> t.Optional[exp.Expression]: 2025 this = self._parse_bitwise() 2026 2027 if self._match_text_seq("DELETE"): 2028 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 2029 if self._match_text_seq("RECOMPRESS"): 2030 return self.expression( 2031 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 2032 ) 2033 if self._match_text_seq("TO", "DISK"): 2034 return self.expression( 2035 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 2036 ) 2037 if self._match_text_seq("TO", "VOLUME"): 2038 return self.expression( 2039 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 2040 ) 2041 2042 return this 2043 2044 expressions = self._parse_csv(_parse_ttl_action) 2045 where = self._parse_where() 2046 group = self._parse_group() 2047 2048 aggregates = None 2049 if group and self._match(TokenType.SET): 2050 aggregates = self._parse_csv(self._parse_set_item) 2051 2052 return self.expression( 2053 exp.MergeTreeTTL, 2054 expressions=expressions, 2055 where=where, 2056 group=group, 2057 aggregates=aggregates, 2058 ) 2059 2060 def _parse_statement(self) -> t.Optional[exp.Expression]: 2061 if self._curr is None: 2062 return None 2063 2064 if self._match_set(self.STATEMENT_PARSERS): 2065 comments = self._prev_comments 2066 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2067 stmt.add_comments(comments, prepend=True) 2068 return stmt 2069 2070 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2071 return self._parse_command() 2072 2073 expression = self._parse_expression() 2074 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2075 return self._parse_query_modifiers(expression) 2076 2077 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2078 start = self._prev 2079 temporary = self._match(TokenType.TEMPORARY) 2080 materialized = self._match_text_seq("MATERIALIZED") 2081 2082 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2083 if not kind: 2084 return self._parse_as_command(start) 2085 2086 concurrently = self._match_text_seq("CONCURRENTLY") 2087 if_exists = exists or self._parse_exists() 2088 2089 if kind == "COLUMN": 2090 this = self._parse_column() 2091 else: 2092 this = self._parse_table_parts( 2093 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 2094 ) 2095 2096 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2097 2098 if self._match(TokenType.L_PAREN, advance=False): 2099 expressions = self._parse_wrapped_csv(self._parse_types) 2100 else: 2101 expressions = None 2102 2103 return self.expression( 2104 exp.Drop, 2105 exists=if_exists, 2106 this=this, 2107 expressions=expressions, 2108 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2109 temporary=temporary, 2110 materialized=materialized, 2111 cascade=self._match_text_seq("CASCADE"), 2112 constraints=self._match_text_seq("CONSTRAINTS"), 2113 purge=self._match_text_seq("PURGE"), 2114 cluster=cluster, 2115 concurrently=concurrently, 2116 ) 2117 2118 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2119 return ( 2120 self._match_text_seq("IF") 2121 and (not not_ or self._match(TokenType.NOT)) 2122 and self._match(TokenType.EXISTS) 2123 ) 2124 2125 def _parse_create(self) -> exp.Create | exp.Command: 2126 # Note: this can't be None because we've matched a statement parser 2127 start = self._prev 2128 2129 replace = ( 2130 start.token_type == TokenType.REPLACE 2131 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2132 or self._match_pair(TokenType.OR, TokenType.ALTER) 2133 ) 2134 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2135 2136 unique = self._match(TokenType.UNIQUE) 2137 2138 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2139 clustered = True 2140 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2141 "COLUMNSTORE" 2142 ): 2143 clustered = False 2144 else: 2145 clustered = None 2146 2147 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2148 self._advance() 2149 2150 properties = None 2151 create_token = self._match_set(self.CREATABLES) and self._prev 2152 2153 if not create_token: 2154 # exp.Properties.Location.POST_CREATE 2155 properties = self._parse_properties() 2156 create_token = self._match_set(self.CREATABLES) and self._prev 2157 2158 if not properties or not create_token: 2159 return self._parse_as_command(start) 2160 2161 concurrently = self._match_text_seq("CONCURRENTLY") 2162 exists = self._parse_exists(not_=True) 2163 this = None 2164 expression: t.Optional[exp.Expression] = None 2165 indexes = None 2166 no_schema_binding = None 2167 begin = None 2168 end = None 2169 clone = None 2170 2171 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2172 nonlocal properties 2173 if properties and temp_props: 2174 properties.expressions.extend(temp_props.expressions) 2175 elif temp_props: 2176 properties = temp_props 2177 2178 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2179 this = self._parse_user_defined_function(kind=create_token.token_type) 2180 2181 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2182 extend_props(self._parse_properties()) 2183 2184 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2185 extend_props(self._parse_properties()) 2186 2187 if not expression: 2188 if self._match(TokenType.COMMAND): 2189 expression = self._parse_as_command(self._prev) 2190 else: 2191 begin = self._match(TokenType.BEGIN) 2192 return_ = self._match_text_seq("RETURN") 2193 2194 if self._match(TokenType.STRING, advance=False): 2195 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2196 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2197 expression = self._parse_string() 2198 extend_props(self._parse_properties()) 2199 else: 2200 expression = self._parse_user_defined_function_expression() 2201 2202 end = self._match_text_seq("END") 2203 2204 if return_: 2205 expression = self.expression(exp.Return, this=expression) 2206 elif create_token.token_type == TokenType.INDEX: 2207 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2208 if not self._match(TokenType.ON): 2209 index = self._parse_id_var() 2210 anonymous = False 2211 else: 2212 index = None 2213 anonymous = True 2214 2215 this = self._parse_index(index=index, anonymous=anonymous) 2216 elif create_token.token_type in self.DB_CREATABLES: 2217 table_parts = self._parse_table_parts( 2218 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2219 ) 2220 2221 # exp.Properties.Location.POST_NAME 2222 self._match(TokenType.COMMA) 2223 extend_props(self._parse_properties(before=True)) 2224 2225 this = self._parse_schema(this=table_parts) 2226 2227 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2228 extend_props(self._parse_properties()) 2229 2230 has_alias = self._match(TokenType.ALIAS) 2231 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2232 # exp.Properties.Location.POST_ALIAS 2233 extend_props(self._parse_properties()) 2234 2235 if create_token.token_type == TokenType.SEQUENCE: 2236 expression = self._parse_types() 2237 props = self._parse_properties() 2238 if props: 2239 sequence_props = exp.SequenceProperties() 2240 options = [] 2241 for prop in props: 2242 if isinstance(prop, exp.SequenceProperties): 2243 for arg, value in prop.args.items(): 2244 if arg == "options": 2245 options.extend(value) 2246 else: 2247 sequence_props.set(arg, value) 2248 prop.pop() 2249 2250 if options: 2251 sequence_props.set("options", options) 2252 2253 props.append("expressions", sequence_props) 2254 extend_props(props) 2255 else: 2256 expression = self._parse_ddl_select() 2257 2258 # Some dialects also support using a table as an alias instead of a SELECT. 2259 # Here we fallback to this as an alternative. 2260 if not expression and has_alias: 2261 expression = self._try_parse(self._parse_table_parts) 2262 2263 if create_token.token_type == TokenType.TABLE: 2264 # exp.Properties.Location.POST_EXPRESSION 2265 extend_props(self._parse_properties()) 2266 2267 indexes = [] 2268 while True: 2269 index = self._parse_index() 2270 2271 # exp.Properties.Location.POST_INDEX 2272 extend_props(self._parse_properties()) 2273 if not index: 2274 break 2275 else: 2276 self._match(TokenType.COMMA) 2277 indexes.append(index) 2278 elif create_token.token_type == TokenType.VIEW: 2279 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2280 no_schema_binding = True 2281 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2282 extend_props(self._parse_properties()) 2283 2284 shallow = self._match_text_seq("SHALLOW") 2285 2286 if self._match_texts(self.CLONE_KEYWORDS): 2287 copy = self._prev.text.lower() == "copy" 2288 clone = self.expression( 2289 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2290 ) 2291 2292 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2293 return self._parse_as_command(start) 2294 2295 create_kind_text = create_token.text.upper() 2296 return self.expression( 2297 exp.Create, 2298 this=this, 2299 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2300 replace=replace, 2301 refresh=refresh, 2302 unique=unique, 2303 expression=expression, 2304 exists=exists, 2305 properties=properties, 2306 indexes=indexes, 2307 no_schema_binding=no_schema_binding, 2308 begin=begin, 2309 end=end, 2310 clone=clone, 2311 concurrently=concurrently, 2312 clustered=clustered, 2313 ) 2314 2315 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2316 seq = exp.SequenceProperties() 2317 2318 options = [] 2319 index = self._index 2320 2321 while self._curr: 2322 self._match(TokenType.COMMA) 2323 if self._match_text_seq("INCREMENT"): 2324 self._match_text_seq("BY") 2325 self._match_text_seq("=") 2326 seq.set("increment", self._parse_term()) 2327 elif self._match_text_seq("MINVALUE"): 2328 seq.set("minvalue", self._parse_term()) 2329 elif self._match_text_seq("MAXVALUE"): 2330 seq.set("maxvalue", self._parse_term()) 2331 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2332 self._match_text_seq("=") 2333 seq.set("start", self._parse_term()) 2334 elif self._match_text_seq("CACHE"): 2335 # T-SQL allows empty CACHE which is initialized dynamically 2336 seq.set("cache", self._parse_number() or True) 2337 elif self._match_text_seq("OWNED", "BY"): 2338 # "OWNED BY NONE" is the default 2339 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2340 else: 2341 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2342 if opt: 2343 options.append(opt) 2344 else: 2345 break 2346 2347 seq.set("options", options if options else None) 2348 return None if self._index == index else seq 2349 2350 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2351 # only used for teradata currently 2352 self._match(TokenType.COMMA) 2353 2354 kwargs = { 2355 "no": self._match_text_seq("NO"), 2356 "dual": self._match_text_seq("DUAL"), 2357 "before": self._match_text_seq("BEFORE"), 2358 "default": self._match_text_seq("DEFAULT"), 2359 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2360 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2361 "after": self._match_text_seq("AFTER"), 2362 "minimum": self._match_texts(("MIN", "MINIMUM")), 2363 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2364 } 2365 2366 if self._match_texts(self.PROPERTY_PARSERS): 2367 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2368 try: 2369 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2370 except TypeError: 2371 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2372 2373 return None 2374 2375 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2376 return self._parse_wrapped_csv(self._parse_property) 2377 2378 def _parse_property(self) -> t.Optional[exp.Expression]: 2379 if self._match_texts(self.PROPERTY_PARSERS): 2380 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2381 2382 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2383 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2384 2385 if self._match_text_seq("COMPOUND", "SORTKEY"): 2386 return self._parse_sortkey(compound=True) 2387 2388 if self._match_text_seq("SQL", "SECURITY"): 2389 return self.expression( 2390 exp.SqlSecurityProperty, 2391 this=self._match_texts(("DEFINER", "INVOKER")) and self._prev.text.upper(), 2392 ) 2393 2394 index = self._index 2395 2396 seq_props = self._parse_sequence_properties() 2397 if seq_props: 2398 return seq_props 2399 2400 self._retreat(index) 2401 key = self._parse_column() 2402 2403 if not self._match(TokenType.EQ): 2404 self._retreat(index) 2405 return None 2406 2407 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2408 if isinstance(key, exp.Column): 2409 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2410 2411 value = self._parse_bitwise() or self._parse_var(any_token=True) 2412 2413 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2414 if isinstance(value, exp.Column): 2415 value = exp.var(value.name) 2416 2417 return self.expression(exp.Property, this=key, value=value) 2418 2419 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2420 if self._match_text_seq("BY"): 2421 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2422 2423 self._match(TokenType.ALIAS) 2424 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2425 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2426 2427 return self.expression( 2428 exp.FileFormatProperty, 2429 this=( 2430 self.expression( 2431 exp.InputOutputFormat, 2432 input_format=input_format, 2433 output_format=output_format, 2434 ) 2435 if input_format or output_format 2436 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2437 ), 2438 hive_format=True, 2439 ) 2440 2441 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2442 field = self._parse_field() 2443 if isinstance(field, exp.Identifier) and not field.quoted: 2444 field = exp.var(field) 2445 2446 return field 2447 2448 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2449 self._match(TokenType.EQ) 2450 self._match(TokenType.ALIAS) 2451 2452 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2453 2454 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2455 properties = [] 2456 while True: 2457 if before: 2458 prop = self._parse_property_before() 2459 else: 2460 prop = self._parse_property() 2461 if not prop: 2462 break 2463 for p in ensure_list(prop): 2464 properties.append(p) 2465 2466 if properties: 2467 return self.expression(exp.Properties, expressions=properties) 2468 2469 return None 2470 2471 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2472 return self.expression( 2473 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2474 ) 2475 2476 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2477 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2478 security_specifier = self._prev.text.upper() 2479 return self.expression(exp.SecurityProperty, this=security_specifier) 2480 return None 2481 2482 def _parse_settings_property(self) -> exp.SettingsProperty: 2483 return self.expression( 2484 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2485 ) 2486 2487 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2488 if self._index >= 2: 2489 pre_volatile_token = self._tokens[self._index - 2] 2490 else: 2491 pre_volatile_token = None 2492 2493 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2494 return exp.VolatileProperty() 2495 2496 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2497 2498 def _parse_retention_period(self) -> exp.Var: 2499 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2500 number = self._parse_number() 2501 number_str = f"{number} " if number else "" 2502 unit = self._parse_var(any_token=True) 2503 return exp.var(f"{number_str}{unit}") 2504 2505 def _parse_system_versioning_property( 2506 self, with_: bool = False 2507 ) -> exp.WithSystemVersioningProperty: 2508 self._match(TokenType.EQ) 2509 prop = self.expression( 2510 exp.WithSystemVersioningProperty, 2511 on=True, 2512 with_=with_, 2513 ) 2514 2515 if self._match_text_seq("OFF"): 2516 prop.set("on", False) 2517 return prop 2518 2519 self._match(TokenType.ON) 2520 if self._match(TokenType.L_PAREN): 2521 while self._curr and not self._match(TokenType.R_PAREN): 2522 if self._match_text_seq("HISTORY_TABLE", "="): 2523 prop.set("this", self._parse_table_parts()) 2524 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2525 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2526 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2527 prop.set("retention_period", self._parse_retention_period()) 2528 2529 self._match(TokenType.COMMA) 2530 2531 return prop 2532 2533 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2534 self._match(TokenType.EQ) 2535 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2536 prop = self.expression(exp.DataDeletionProperty, on=on) 2537 2538 if self._match(TokenType.L_PAREN): 2539 while self._curr and not self._match(TokenType.R_PAREN): 2540 if self._match_text_seq("FILTER_COLUMN", "="): 2541 prop.set("filter_column", self._parse_column()) 2542 elif self._match_text_seq("RETENTION_PERIOD", "="): 2543 prop.set("retention_period", self._parse_retention_period()) 2544 2545 self._match(TokenType.COMMA) 2546 2547 return prop 2548 2549 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2550 kind = "HASH" 2551 expressions: t.Optional[t.List[exp.Expression]] = None 2552 if self._match_text_seq("BY", "HASH"): 2553 expressions = self._parse_wrapped_csv(self._parse_id_var) 2554 elif self._match_text_seq("BY", "RANDOM"): 2555 kind = "RANDOM" 2556 2557 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2558 buckets: t.Optional[exp.Expression] = None 2559 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2560 buckets = self._parse_number() 2561 2562 return self.expression( 2563 exp.DistributedByProperty, 2564 expressions=expressions, 2565 kind=kind, 2566 buckets=buckets, 2567 order=self._parse_order(), 2568 ) 2569 2570 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2571 self._match_text_seq("KEY") 2572 expressions = self._parse_wrapped_id_vars() 2573 return self.expression(expr_type, expressions=expressions) 2574 2575 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2576 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2577 prop = self._parse_system_versioning_property(with_=True) 2578 self._match_r_paren() 2579 return prop 2580 2581 if self._match(TokenType.L_PAREN, advance=False): 2582 return self._parse_wrapped_properties() 2583 2584 if self._match_text_seq("JOURNAL"): 2585 return self._parse_withjournaltable() 2586 2587 if self._match_texts(self.VIEW_ATTRIBUTES): 2588 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2589 2590 if self._match_text_seq("DATA"): 2591 return self._parse_withdata(no=False) 2592 elif self._match_text_seq("NO", "DATA"): 2593 return self._parse_withdata(no=True) 2594 2595 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2596 return self._parse_serde_properties(with_=True) 2597 2598 if self._match(TokenType.SCHEMA): 2599 return self.expression( 2600 exp.WithSchemaBindingProperty, 2601 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2602 ) 2603 2604 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2605 return self.expression( 2606 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2607 ) 2608 2609 if not self._next: 2610 return None 2611 2612 return self._parse_withisolatedloading() 2613 2614 def _parse_procedure_option(self) -> exp.Expression | None: 2615 if self._match_text_seq("EXECUTE", "AS"): 2616 return self.expression( 2617 exp.ExecuteAsProperty, 2618 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2619 or self._parse_string(), 2620 ) 2621 2622 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2623 2624 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2625 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2626 self._match(TokenType.EQ) 2627 2628 user = self._parse_id_var() 2629 self._match(TokenType.PARAMETER) 2630 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2631 2632 if not user or not host: 2633 return None 2634 2635 return exp.DefinerProperty(this=f"{user}@{host}") 2636 2637 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2638 self._match(TokenType.TABLE) 2639 self._match(TokenType.EQ) 2640 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2641 2642 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2643 return self.expression(exp.LogProperty, no=no) 2644 2645 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2646 return self.expression(exp.JournalProperty, **kwargs) 2647 2648 def _parse_checksum(self) -> exp.ChecksumProperty: 2649 self._match(TokenType.EQ) 2650 2651 on = None 2652 if self._match(TokenType.ON): 2653 on = True 2654 elif self._match_text_seq("OFF"): 2655 on = False 2656 2657 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2658 2659 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2660 return self.expression( 2661 exp.Cluster, 2662 expressions=( 2663 self._parse_wrapped_csv(self._parse_ordered) 2664 if wrapped 2665 else self._parse_csv(self._parse_ordered) 2666 ), 2667 ) 2668 2669 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2670 self._match_text_seq("BY") 2671 2672 self._match_l_paren() 2673 expressions = self._parse_csv(self._parse_column) 2674 self._match_r_paren() 2675 2676 if self._match_text_seq("SORTED", "BY"): 2677 self._match_l_paren() 2678 sorted_by = self._parse_csv(self._parse_ordered) 2679 self._match_r_paren() 2680 else: 2681 sorted_by = None 2682 2683 self._match(TokenType.INTO) 2684 buckets = self._parse_number() 2685 self._match_text_seq("BUCKETS") 2686 2687 return self.expression( 2688 exp.ClusteredByProperty, 2689 expressions=expressions, 2690 sorted_by=sorted_by, 2691 buckets=buckets, 2692 ) 2693 2694 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2695 if not self._match_text_seq("GRANTS"): 2696 self._retreat(self._index - 1) 2697 return None 2698 2699 return self.expression(exp.CopyGrantsProperty) 2700 2701 def _parse_freespace(self) -> exp.FreespaceProperty: 2702 self._match(TokenType.EQ) 2703 return self.expression( 2704 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2705 ) 2706 2707 def _parse_mergeblockratio( 2708 self, no: bool = False, default: bool = False 2709 ) -> exp.MergeBlockRatioProperty: 2710 if self._match(TokenType.EQ): 2711 return self.expression( 2712 exp.MergeBlockRatioProperty, 2713 this=self._parse_number(), 2714 percent=self._match(TokenType.PERCENT), 2715 ) 2716 2717 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2718 2719 def _parse_datablocksize( 2720 self, 2721 default: t.Optional[bool] = None, 2722 minimum: t.Optional[bool] = None, 2723 maximum: t.Optional[bool] = None, 2724 ) -> exp.DataBlocksizeProperty: 2725 self._match(TokenType.EQ) 2726 size = self._parse_number() 2727 2728 units = None 2729 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2730 units = self._prev.text 2731 2732 return self.expression( 2733 exp.DataBlocksizeProperty, 2734 size=size, 2735 units=units, 2736 default=default, 2737 minimum=minimum, 2738 maximum=maximum, 2739 ) 2740 2741 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2742 self._match(TokenType.EQ) 2743 always = self._match_text_seq("ALWAYS") 2744 manual = self._match_text_seq("MANUAL") 2745 never = self._match_text_seq("NEVER") 2746 default = self._match_text_seq("DEFAULT") 2747 2748 autotemp = None 2749 if self._match_text_seq("AUTOTEMP"): 2750 autotemp = self._parse_schema() 2751 2752 return self.expression( 2753 exp.BlockCompressionProperty, 2754 always=always, 2755 manual=manual, 2756 never=never, 2757 default=default, 2758 autotemp=autotemp, 2759 ) 2760 2761 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2762 index = self._index 2763 no = self._match_text_seq("NO") 2764 concurrent = self._match_text_seq("CONCURRENT") 2765 2766 if not self._match_text_seq("ISOLATED", "LOADING"): 2767 self._retreat(index) 2768 return None 2769 2770 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2771 return self.expression( 2772 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2773 ) 2774 2775 def _parse_locking(self) -> exp.LockingProperty: 2776 if self._match(TokenType.TABLE): 2777 kind = "TABLE" 2778 elif self._match(TokenType.VIEW): 2779 kind = "VIEW" 2780 elif self._match(TokenType.ROW): 2781 kind = "ROW" 2782 elif self._match_text_seq("DATABASE"): 2783 kind = "DATABASE" 2784 else: 2785 kind = None 2786 2787 if kind in ("DATABASE", "TABLE", "VIEW"): 2788 this = self._parse_table_parts() 2789 else: 2790 this = None 2791 2792 if self._match(TokenType.FOR): 2793 for_or_in = "FOR" 2794 elif self._match(TokenType.IN): 2795 for_or_in = "IN" 2796 else: 2797 for_or_in = None 2798 2799 if self._match_text_seq("ACCESS"): 2800 lock_type = "ACCESS" 2801 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2802 lock_type = "EXCLUSIVE" 2803 elif self._match_text_seq("SHARE"): 2804 lock_type = "SHARE" 2805 elif self._match_text_seq("READ"): 2806 lock_type = "READ" 2807 elif self._match_text_seq("WRITE"): 2808 lock_type = "WRITE" 2809 elif self._match_text_seq("CHECKSUM"): 2810 lock_type = "CHECKSUM" 2811 else: 2812 lock_type = None 2813 2814 override = self._match_text_seq("OVERRIDE") 2815 2816 return self.expression( 2817 exp.LockingProperty, 2818 this=this, 2819 kind=kind, 2820 for_or_in=for_or_in, 2821 lock_type=lock_type, 2822 override=override, 2823 ) 2824 2825 def _parse_partition_by(self) -> t.List[exp.Expression]: 2826 if self._match(TokenType.PARTITION_BY): 2827 return self._parse_csv(self._parse_disjunction) 2828 return [] 2829 2830 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2831 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2832 if self._match_text_seq("MINVALUE"): 2833 return exp.var("MINVALUE") 2834 if self._match_text_seq("MAXVALUE"): 2835 return exp.var("MAXVALUE") 2836 return self._parse_bitwise() 2837 2838 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2839 expression = None 2840 from_expressions = None 2841 to_expressions = None 2842 2843 if self._match(TokenType.IN): 2844 this = self._parse_wrapped_csv(self._parse_bitwise) 2845 elif self._match(TokenType.FROM): 2846 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2847 self._match_text_seq("TO") 2848 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2849 elif self._match_text_seq("WITH", "(", "MODULUS"): 2850 this = self._parse_number() 2851 self._match_text_seq(",", "REMAINDER") 2852 expression = self._parse_number() 2853 self._match_r_paren() 2854 else: 2855 self.raise_error("Failed to parse partition bound spec.") 2856 2857 return self.expression( 2858 exp.PartitionBoundSpec, 2859 this=this, 2860 expression=expression, 2861 from_expressions=from_expressions, 2862 to_expressions=to_expressions, 2863 ) 2864 2865 # https://www.postgresql.org/docs/current/sql-createtable.html 2866 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2867 if not self._match_text_seq("OF"): 2868 self._retreat(self._index - 1) 2869 return None 2870 2871 this = self._parse_table(schema=True) 2872 2873 if self._match(TokenType.DEFAULT): 2874 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2875 elif self._match_text_seq("FOR", "VALUES"): 2876 expression = self._parse_partition_bound_spec() 2877 else: 2878 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2879 2880 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2881 2882 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2883 self._match(TokenType.EQ) 2884 return self.expression( 2885 exp.PartitionedByProperty, 2886 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2887 ) 2888 2889 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2890 if self._match_text_seq("AND", "STATISTICS"): 2891 statistics = True 2892 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2893 statistics = False 2894 else: 2895 statistics = None 2896 2897 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2898 2899 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2900 if self._match_text_seq("SQL"): 2901 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2902 return None 2903 2904 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2905 if self._match_text_seq("SQL", "DATA"): 2906 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2907 return None 2908 2909 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2910 if self._match_text_seq("PRIMARY", "INDEX"): 2911 return exp.NoPrimaryIndexProperty() 2912 if self._match_text_seq("SQL"): 2913 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2914 return None 2915 2916 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2917 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2918 return exp.OnCommitProperty() 2919 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2920 return exp.OnCommitProperty(delete=True) 2921 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2922 2923 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2924 if self._match_text_seq("SQL", "DATA"): 2925 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2926 return None 2927 2928 def _parse_distkey(self) -> exp.DistKeyProperty: 2929 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2930 2931 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2932 table = self._parse_table(schema=True) 2933 2934 options = [] 2935 while self._match_texts(("INCLUDING", "EXCLUDING")): 2936 this = self._prev.text.upper() 2937 2938 id_var = self._parse_id_var() 2939 if not id_var: 2940 return None 2941 2942 options.append( 2943 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2944 ) 2945 2946 return self.expression(exp.LikeProperty, this=table, expressions=options) 2947 2948 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2949 return self.expression( 2950 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2951 ) 2952 2953 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2954 self._match(TokenType.EQ) 2955 return self.expression( 2956 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2957 ) 2958 2959 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2960 self._match_text_seq("WITH", "CONNECTION") 2961 return self.expression( 2962 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2963 ) 2964 2965 def _parse_returns(self) -> exp.ReturnsProperty: 2966 value: t.Optional[exp.Expression] 2967 null = None 2968 is_table = self._match(TokenType.TABLE) 2969 2970 if is_table: 2971 if self._match(TokenType.LT): 2972 value = self.expression( 2973 exp.Schema, 2974 this="TABLE", 2975 expressions=self._parse_csv(self._parse_struct_types), 2976 ) 2977 if not self._match(TokenType.GT): 2978 self.raise_error("Expecting >") 2979 else: 2980 value = self._parse_schema(exp.var("TABLE")) 2981 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2982 null = True 2983 value = None 2984 else: 2985 value = self._parse_types() 2986 2987 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2988 2989 def _parse_describe(self) -> exp.Describe: 2990 kind = self._match_set(self.CREATABLES) and self._prev.text 2991 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2992 if self._match(TokenType.DOT): 2993 style = None 2994 self._retreat(self._index - 2) 2995 2996 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2997 2998 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2999 this = self._parse_statement() 3000 else: 3001 this = self._parse_table(schema=True) 3002 3003 properties = self._parse_properties() 3004 expressions = properties.expressions if properties else None 3005 partition = self._parse_partition() 3006 return self.expression( 3007 exp.Describe, 3008 this=this, 3009 style=style, 3010 kind=kind, 3011 expressions=expressions, 3012 partition=partition, 3013 format=format, 3014 as_json=self._match_text_seq("AS", "JSON"), 3015 ) 3016 3017 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 3018 kind = self._prev.text.upper() 3019 expressions = [] 3020 3021 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 3022 if self._match(TokenType.WHEN): 3023 expression = self._parse_disjunction() 3024 self._match(TokenType.THEN) 3025 else: 3026 expression = None 3027 3028 else_ = self._match(TokenType.ELSE) 3029 3030 if not self._match(TokenType.INTO): 3031 return None 3032 3033 return self.expression( 3034 exp.ConditionalInsert, 3035 this=self.expression( 3036 exp.Insert, 3037 this=self._parse_table(schema=True), 3038 expression=self._parse_derived_table_values(), 3039 ), 3040 expression=expression, 3041 else_=else_, 3042 ) 3043 3044 expression = parse_conditional_insert() 3045 while expression is not None: 3046 expressions.append(expression) 3047 expression = parse_conditional_insert() 3048 3049 return self.expression( 3050 exp.MultitableInserts, 3051 kind=kind, 3052 comments=comments, 3053 expressions=expressions, 3054 source=self._parse_table(), 3055 ) 3056 3057 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 3058 comments = [] 3059 hint = self._parse_hint() 3060 overwrite = self._match(TokenType.OVERWRITE) 3061 ignore = self._match(TokenType.IGNORE) 3062 local = self._match_text_seq("LOCAL") 3063 alternative = None 3064 is_function = None 3065 3066 if self._match_text_seq("DIRECTORY"): 3067 this: t.Optional[exp.Expression] = self.expression( 3068 exp.Directory, 3069 this=self._parse_var_or_string(), 3070 local=local, 3071 row_format=self._parse_row_format(match_row=True), 3072 ) 3073 else: 3074 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3075 comments += ensure_list(self._prev_comments) 3076 return self._parse_multitable_inserts(comments) 3077 3078 if self._match(TokenType.OR): 3079 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3080 3081 self._match(TokenType.INTO) 3082 comments += ensure_list(self._prev_comments) 3083 self._match(TokenType.TABLE) 3084 is_function = self._match(TokenType.FUNCTION) 3085 3086 this = self._parse_function() if is_function else self._parse_insert_table() 3087 3088 returning = self._parse_returning() # TSQL allows RETURNING before source 3089 3090 return self.expression( 3091 exp.Insert, 3092 comments=comments, 3093 hint=hint, 3094 is_function=is_function, 3095 this=this, 3096 stored=self._match_text_seq("STORED") and self._parse_stored(), 3097 by_name=self._match_text_seq("BY", "NAME"), 3098 exists=self._parse_exists(), 3099 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3100 and self._parse_disjunction(), 3101 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3102 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3103 default=self._match_text_seq("DEFAULT", "VALUES"), 3104 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3105 conflict=self._parse_on_conflict(), 3106 returning=returning or self._parse_returning(), 3107 overwrite=overwrite, 3108 alternative=alternative, 3109 ignore=ignore, 3110 source=self._match(TokenType.TABLE) and self._parse_table(), 3111 ) 3112 3113 def _parse_insert_table(self) -> t.Optional[exp.Expression]: 3114 this = self._parse_table(schema=True, parse_partition=True) 3115 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3116 this.set("alias", self._parse_table_alias()) 3117 return this 3118 3119 def _parse_kill(self) -> exp.Kill: 3120 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3121 3122 return self.expression( 3123 exp.Kill, 3124 this=self._parse_primary(), 3125 kind=kind, 3126 ) 3127 3128 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 3129 conflict = self._match_text_seq("ON", "CONFLICT") 3130 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3131 3132 if not conflict and not duplicate: 3133 return None 3134 3135 conflict_keys = None 3136 constraint = None 3137 3138 if conflict: 3139 if self._match_text_seq("ON", "CONSTRAINT"): 3140 constraint = self._parse_id_var() 3141 elif self._match(TokenType.L_PAREN): 3142 conflict_keys = self._parse_csv(self._parse_id_var) 3143 self._match_r_paren() 3144 3145 index_predicate = self._parse_where() 3146 3147 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3148 if self._prev.token_type == TokenType.UPDATE: 3149 self._match(TokenType.SET) 3150 expressions = self._parse_csv(self._parse_equality) 3151 else: 3152 expressions = None 3153 3154 return self.expression( 3155 exp.OnConflict, 3156 duplicate=duplicate, 3157 expressions=expressions, 3158 action=action, 3159 conflict_keys=conflict_keys, 3160 index_predicate=index_predicate, 3161 constraint=constraint, 3162 where=self._parse_where(), 3163 ) 3164 3165 def _parse_returning(self) -> t.Optional[exp.Returning]: 3166 if not self._match(TokenType.RETURNING): 3167 return None 3168 return self.expression( 3169 exp.Returning, 3170 expressions=self._parse_csv(self._parse_expression), 3171 into=self._match(TokenType.INTO) and self._parse_table_part(), 3172 ) 3173 3174 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3175 if not self._match(TokenType.FORMAT): 3176 return None 3177 return self._parse_row_format() 3178 3179 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3180 index = self._index 3181 with_ = with_ or self._match_text_seq("WITH") 3182 3183 if not self._match(TokenType.SERDE_PROPERTIES): 3184 self._retreat(index) 3185 return None 3186 return self.expression( 3187 exp.SerdeProperties, 3188 expressions=self._parse_wrapped_properties(), 3189 with_=with_, 3190 ) 3191 3192 def _parse_row_format( 3193 self, match_row: bool = False 3194 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3195 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3196 return None 3197 3198 if self._match_text_seq("SERDE"): 3199 this = self._parse_string() 3200 3201 serde_properties = self._parse_serde_properties() 3202 3203 return self.expression( 3204 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3205 ) 3206 3207 self._match_text_seq("DELIMITED") 3208 3209 kwargs = {} 3210 3211 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3212 kwargs["fields"] = self._parse_string() 3213 if self._match_text_seq("ESCAPED", "BY"): 3214 kwargs["escaped"] = self._parse_string() 3215 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3216 kwargs["collection_items"] = self._parse_string() 3217 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3218 kwargs["map_keys"] = self._parse_string() 3219 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3220 kwargs["lines"] = self._parse_string() 3221 if self._match_text_seq("NULL", "DEFINED", "AS"): 3222 kwargs["null"] = self._parse_string() 3223 3224 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3225 3226 def _parse_load(self) -> exp.LoadData | exp.Command: 3227 if self._match_text_seq("DATA"): 3228 local = self._match_text_seq("LOCAL") 3229 self._match_text_seq("INPATH") 3230 inpath = self._parse_string() 3231 overwrite = self._match(TokenType.OVERWRITE) 3232 self._match_pair(TokenType.INTO, TokenType.TABLE) 3233 3234 return self.expression( 3235 exp.LoadData, 3236 this=self._parse_table(schema=True), 3237 local=local, 3238 overwrite=overwrite, 3239 inpath=inpath, 3240 partition=self._parse_partition(), 3241 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3242 serde=self._match_text_seq("SERDE") and self._parse_string(), 3243 ) 3244 return self._parse_as_command(self._prev) 3245 3246 def _parse_delete(self) -> exp.Delete: 3247 # This handles MySQL's "Multiple-Table Syntax" 3248 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3249 tables = None 3250 if not self._match(TokenType.FROM, advance=False): 3251 tables = self._parse_csv(self._parse_table) or None 3252 3253 returning = self._parse_returning() 3254 3255 return self.expression( 3256 exp.Delete, 3257 tables=tables, 3258 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3259 using=self._match(TokenType.USING) 3260 and self._parse_csv(lambda: self._parse_table(joins=True)), 3261 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3262 where=self._parse_where(), 3263 returning=returning or self._parse_returning(), 3264 order=self._parse_order(), 3265 limit=self._parse_limit(), 3266 ) 3267 3268 def _parse_update(self) -> exp.Update: 3269 kwargs: t.Dict[str, t.Any] = { 3270 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3271 } 3272 while self._curr: 3273 if self._match(TokenType.SET): 3274 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3275 elif self._match(TokenType.RETURNING, advance=False): 3276 kwargs["returning"] = self._parse_returning() 3277 elif self._match(TokenType.FROM, advance=False): 3278 from_ = self._parse_from(joins=True) 3279 table = from_.this if from_ else None 3280 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3281 table.set("joins", list(self._parse_joins()) or None) 3282 3283 kwargs["from_"] = from_ 3284 elif self._match(TokenType.WHERE, advance=False): 3285 kwargs["where"] = self._parse_where() 3286 elif self._match(TokenType.ORDER_BY, advance=False): 3287 kwargs["order"] = self._parse_order() 3288 elif self._match(TokenType.LIMIT, advance=False): 3289 kwargs["limit"] = self._parse_limit() 3290 else: 3291 break 3292 3293 return self.expression(exp.Update, **kwargs) 3294 3295 def _parse_use(self) -> exp.Use: 3296 return self.expression( 3297 exp.Use, 3298 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3299 this=self._parse_table(schema=False), 3300 ) 3301 3302 def _parse_uncache(self) -> exp.Uncache: 3303 if not self._match(TokenType.TABLE): 3304 self.raise_error("Expecting TABLE after UNCACHE") 3305 3306 return self.expression( 3307 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3308 ) 3309 3310 def _parse_cache(self) -> exp.Cache: 3311 lazy = self._match_text_seq("LAZY") 3312 self._match(TokenType.TABLE) 3313 table = self._parse_table(schema=True) 3314 3315 options = [] 3316 if self._match_text_seq("OPTIONS"): 3317 self._match_l_paren() 3318 k = self._parse_string() 3319 self._match(TokenType.EQ) 3320 v = self._parse_string() 3321 options = [k, v] 3322 self._match_r_paren() 3323 3324 self._match(TokenType.ALIAS) 3325 return self.expression( 3326 exp.Cache, 3327 this=table, 3328 lazy=lazy, 3329 options=options, 3330 expression=self._parse_select(nested=True), 3331 ) 3332 3333 def _parse_partition(self) -> t.Optional[exp.Partition]: 3334 if not self._match_texts(self.PARTITION_KEYWORDS): 3335 return None 3336 3337 return self.expression( 3338 exp.Partition, 3339 subpartition=self._prev.text.upper() == "SUBPARTITION", 3340 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3341 ) 3342 3343 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3344 def _parse_value_expression() -> t.Optional[exp.Expression]: 3345 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3346 return exp.var(self._prev.text.upper()) 3347 return self._parse_expression() 3348 3349 if self._match(TokenType.L_PAREN): 3350 expressions = self._parse_csv(_parse_value_expression) 3351 self._match_r_paren() 3352 return self.expression(exp.Tuple, expressions=expressions) 3353 3354 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3355 expression = self._parse_expression() 3356 if expression: 3357 return self.expression(exp.Tuple, expressions=[expression]) 3358 return None 3359 3360 def _parse_projections(self) -> t.List[exp.Expression]: 3361 return self._parse_expressions() 3362 3363 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3364 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3365 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3366 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3367 ) 3368 elif self._match(TokenType.FROM): 3369 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3370 # Support parentheses for duckdb FROM-first syntax 3371 select = self._parse_select(from_=from_) 3372 if select: 3373 if not select.args.get("from_"): 3374 select.set("from_", from_) 3375 this = select 3376 else: 3377 this = exp.select("*").from_(t.cast(exp.From, from_)) 3378 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3379 else: 3380 this = ( 3381 self._parse_table(consume_pipe=True) 3382 if table 3383 else self._parse_select(nested=True, parse_set_operation=False) 3384 ) 3385 3386 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3387 # in case a modifier (e.g. join) is following 3388 if table and isinstance(this, exp.Values) and this.alias: 3389 alias = this.args["alias"].pop() 3390 this = exp.Table(this=this, alias=alias) 3391 3392 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3393 3394 return this 3395 3396 def _parse_select( 3397 self, 3398 nested: bool = False, 3399 table: bool = False, 3400 parse_subquery_alias: bool = True, 3401 parse_set_operation: bool = True, 3402 consume_pipe: bool = True, 3403 from_: t.Optional[exp.From] = None, 3404 ) -> t.Optional[exp.Expression]: 3405 query = self._parse_select_query( 3406 nested=nested, 3407 table=table, 3408 parse_subquery_alias=parse_subquery_alias, 3409 parse_set_operation=parse_set_operation, 3410 ) 3411 3412 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3413 if not query and from_: 3414 query = exp.select("*").from_(from_) 3415 if isinstance(query, exp.Query): 3416 query = self._parse_pipe_syntax_query(query) 3417 query = query.subquery(copy=False) if query and table else query 3418 3419 return query 3420 3421 def _parse_select_query( 3422 self, 3423 nested: bool = False, 3424 table: bool = False, 3425 parse_subquery_alias: bool = True, 3426 parse_set_operation: bool = True, 3427 ) -> t.Optional[exp.Expression]: 3428 cte = self._parse_with() 3429 3430 if cte: 3431 this = self._parse_statement() 3432 3433 if not this: 3434 self.raise_error("Failed to parse any statement following CTE") 3435 return cte 3436 3437 while isinstance(this, exp.Subquery) and this.is_wrapper: 3438 this = this.this 3439 3440 if "with_" in this.arg_types: 3441 this.set("with_", cte) 3442 else: 3443 self.raise_error(f"{this.key} does not support CTE") 3444 this = cte 3445 3446 return this 3447 3448 # duckdb supports leading with FROM x 3449 from_ = ( 3450 self._parse_from(joins=True, consume_pipe=True) 3451 if self._match(TokenType.FROM, advance=False) 3452 else None 3453 ) 3454 3455 if self._match(TokenType.SELECT): 3456 comments = self._prev_comments 3457 3458 hint = self._parse_hint() 3459 3460 if self._next and not self._next.token_type == TokenType.DOT: 3461 all_ = self._match(TokenType.ALL) 3462 distinct = self._match_set(self.DISTINCT_TOKENS) 3463 else: 3464 all_, distinct = None, None 3465 3466 kind = ( 3467 self._match(TokenType.ALIAS) 3468 and self._match_texts(("STRUCT", "VALUE")) 3469 and self._prev.text.upper() 3470 ) 3471 3472 if distinct: 3473 distinct = self.expression( 3474 exp.Distinct, 3475 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3476 ) 3477 3478 if all_ and distinct: 3479 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3480 3481 operation_modifiers = [] 3482 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3483 operation_modifiers.append(exp.var(self._prev.text.upper())) 3484 3485 limit = self._parse_limit(top=True) 3486 projections = self._parse_projections() 3487 3488 this = self.expression( 3489 exp.Select, 3490 kind=kind, 3491 hint=hint, 3492 distinct=distinct, 3493 expressions=projections, 3494 limit=limit, 3495 operation_modifiers=operation_modifiers or None, 3496 ) 3497 this.comments = comments 3498 3499 into = self._parse_into() 3500 if into: 3501 this.set("into", into) 3502 3503 if not from_: 3504 from_ = self._parse_from() 3505 3506 if from_: 3507 this.set("from_", from_) 3508 3509 this = self._parse_query_modifiers(this) 3510 elif (table or nested) and self._match(TokenType.L_PAREN): 3511 this = self._parse_wrapped_select(table=table) 3512 3513 # We return early here so that the UNION isn't attached to the subquery by the 3514 # following call to _parse_set_operations, but instead becomes the parent node 3515 self._match_r_paren() 3516 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3517 elif self._match(TokenType.VALUES, advance=False): 3518 this = self._parse_derived_table_values() 3519 elif from_: 3520 this = exp.select("*").from_(from_.this, copy=False) 3521 elif self._match(TokenType.SUMMARIZE): 3522 table = self._match(TokenType.TABLE) 3523 this = self._parse_select() or self._parse_string() or self._parse_table() 3524 return self.expression(exp.Summarize, this=this, table=table) 3525 elif self._match(TokenType.DESCRIBE): 3526 this = self._parse_describe() 3527 else: 3528 this = None 3529 3530 return self._parse_set_operations(this) if parse_set_operation else this 3531 3532 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3533 self._match_text_seq("SEARCH") 3534 3535 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3536 3537 if not kind: 3538 return None 3539 3540 self._match_text_seq("FIRST", "BY") 3541 3542 return self.expression( 3543 exp.RecursiveWithSearch, 3544 kind=kind, 3545 this=self._parse_id_var(), 3546 expression=self._match_text_seq("SET") and self._parse_id_var(), 3547 using=self._match_text_seq("USING") and self._parse_id_var(), 3548 ) 3549 3550 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3551 if not skip_with_token and not self._match(TokenType.WITH): 3552 return None 3553 3554 comments = self._prev_comments 3555 recursive = self._match(TokenType.RECURSIVE) 3556 3557 last_comments = None 3558 expressions = [] 3559 while True: 3560 cte = self._parse_cte() 3561 if isinstance(cte, exp.CTE): 3562 expressions.append(cte) 3563 if last_comments: 3564 cte.add_comments(last_comments) 3565 3566 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3567 break 3568 else: 3569 self._match(TokenType.WITH) 3570 3571 last_comments = self._prev_comments 3572 3573 return self.expression( 3574 exp.With, 3575 comments=comments, 3576 expressions=expressions, 3577 recursive=recursive, 3578 search=self._parse_recursive_with_search(), 3579 ) 3580 3581 def _parse_cte(self) -> t.Optional[exp.CTE]: 3582 index = self._index 3583 3584 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3585 if not alias or not alias.this: 3586 self.raise_error("Expected CTE to have alias") 3587 3588 key_expressions = ( 3589 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3590 ) 3591 3592 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3593 self._retreat(index) 3594 return None 3595 3596 comments = self._prev_comments 3597 3598 if self._match_text_seq("NOT", "MATERIALIZED"): 3599 materialized = False 3600 elif self._match_text_seq("MATERIALIZED"): 3601 materialized = True 3602 else: 3603 materialized = None 3604 3605 cte = self.expression( 3606 exp.CTE, 3607 this=self._parse_wrapped(self._parse_statement), 3608 alias=alias, 3609 materialized=materialized, 3610 key_expressions=key_expressions, 3611 comments=comments, 3612 ) 3613 3614 values = cte.this 3615 if isinstance(values, exp.Values): 3616 if values.alias: 3617 cte.set("this", exp.select("*").from_(values)) 3618 else: 3619 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3620 3621 return cte 3622 3623 def _parse_table_alias( 3624 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3625 ) -> t.Optional[exp.TableAlias]: 3626 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3627 # so this section tries to parse the clause version and if it fails, it treats the token 3628 # as an identifier (alias) 3629 if self._can_parse_limit_or_offset(): 3630 return None 3631 3632 any_token = self._match(TokenType.ALIAS) 3633 alias = ( 3634 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3635 or self._parse_string_as_identifier() 3636 ) 3637 3638 index = self._index 3639 if self._match(TokenType.L_PAREN): 3640 columns = self._parse_csv(self._parse_function_parameter) 3641 self._match_r_paren() if columns else self._retreat(index) 3642 else: 3643 columns = None 3644 3645 if not alias and not columns: 3646 return None 3647 3648 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3649 3650 # We bubble up comments from the Identifier to the TableAlias 3651 if isinstance(alias, exp.Identifier): 3652 table_alias.add_comments(alias.pop_comments()) 3653 3654 return table_alias 3655 3656 def _parse_subquery( 3657 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3658 ) -> t.Optional[exp.Subquery]: 3659 if not this: 3660 return None 3661 3662 return self.expression( 3663 exp.Subquery, 3664 this=this, 3665 pivots=self._parse_pivots(), 3666 alias=self._parse_table_alias() if parse_alias else None, 3667 sample=self._parse_table_sample(), 3668 ) 3669 3670 def _implicit_unnests_to_explicit(self, this: E) -> E: 3671 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3672 3673 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 3674 for i, join in enumerate(this.args.get("joins") or []): 3675 table = join.this 3676 normalized_table = table.copy() 3677 normalized_table.meta["maybe_column"] = True 3678 normalized_table = _norm(normalized_table, dialect=self.dialect) 3679 3680 if isinstance(table, exp.Table) and not join.args.get("on"): 3681 if normalized_table.parts[0].name in refs: 3682 table_as_column = table.to_column() 3683 unnest = exp.Unnest(expressions=[table_as_column]) 3684 3685 # Table.to_column creates a parent Alias node that we want to convert to 3686 # a TableAlias and attach to the Unnest, so it matches the parser's output 3687 if isinstance(table.args.get("alias"), exp.TableAlias): 3688 table_as_column.replace(table_as_column.this) 3689 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3690 3691 table.replace(unnest) 3692 3693 refs.add(normalized_table.alias_or_name) 3694 3695 return this 3696 3697 @t.overload 3698 def _parse_query_modifiers(self, this: E) -> E: ... 3699 3700 @t.overload 3701 def _parse_query_modifiers(self, this: None) -> None: ... 3702 3703 def _parse_query_modifiers(self, this): 3704 if isinstance(this, self.MODIFIABLES): 3705 for join in self._parse_joins(): 3706 this.append("joins", join) 3707 for lateral in iter(self._parse_lateral, None): 3708 this.append("laterals", lateral) 3709 3710 while True: 3711 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3712 modifier_token = self._curr 3713 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3714 key, expression = parser(self) 3715 3716 if expression: 3717 if this.args.get(key): 3718 self.raise_error( 3719 f"Found multiple '{modifier_token.text.upper()}' clauses", 3720 token=modifier_token, 3721 ) 3722 3723 this.set(key, expression) 3724 if key == "limit": 3725 offset = expression.args.get("offset") 3726 expression.set("offset", None) 3727 3728 if offset: 3729 offset = exp.Offset(expression=offset) 3730 this.set("offset", offset) 3731 3732 limit_by_expressions = expression.expressions 3733 expression.set("expressions", None) 3734 offset.set("expressions", limit_by_expressions) 3735 continue 3736 break 3737 3738 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 3739 this = self._implicit_unnests_to_explicit(this) 3740 3741 return this 3742 3743 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3744 start = self._curr 3745 while self._curr: 3746 self._advance() 3747 3748 end = self._tokens[self._index - 1] 3749 return exp.Hint(expressions=[self._find_sql(start, end)]) 3750 3751 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3752 return self._parse_function_call() 3753 3754 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3755 start_index = self._index 3756 should_fallback_to_string = False 3757 3758 hints = [] 3759 try: 3760 for hint in iter( 3761 lambda: self._parse_csv( 3762 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3763 ), 3764 [], 3765 ): 3766 hints.extend(hint) 3767 except ParseError: 3768 should_fallback_to_string = True 3769 3770 if should_fallback_to_string or self._curr: 3771 self._retreat(start_index) 3772 return self._parse_hint_fallback_to_string() 3773 3774 return self.expression(exp.Hint, expressions=hints) 3775 3776 def _parse_hint(self) -> t.Optional[exp.Hint]: 3777 if self._match(TokenType.HINT) and self._prev_comments: 3778 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3779 3780 return None 3781 3782 def _parse_into(self) -> t.Optional[exp.Into]: 3783 if not self._match(TokenType.INTO): 3784 return None 3785 3786 temp = self._match(TokenType.TEMPORARY) 3787 unlogged = self._match_text_seq("UNLOGGED") 3788 self._match(TokenType.TABLE) 3789 3790 return self.expression( 3791 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3792 ) 3793 3794 def _parse_from( 3795 self, 3796 joins: bool = False, 3797 skip_from_token: bool = False, 3798 consume_pipe: bool = False, 3799 ) -> t.Optional[exp.From]: 3800 if not skip_from_token and not self._match(TokenType.FROM): 3801 return None 3802 3803 return self.expression( 3804 exp.From, 3805 comments=self._prev_comments, 3806 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3807 ) 3808 3809 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3810 return self.expression( 3811 exp.MatchRecognizeMeasure, 3812 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3813 this=self._parse_expression(), 3814 ) 3815 3816 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3817 if not self._match(TokenType.MATCH_RECOGNIZE): 3818 return None 3819 3820 self._match_l_paren() 3821 3822 partition = self._parse_partition_by() 3823 order = self._parse_order() 3824 3825 measures = ( 3826 self._parse_csv(self._parse_match_recognize_measure) 3827 if self._match_text_seq("MEASURES") 3828 else None 3829 ) 3830 3831 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3832 rows = exp.var("ONE ROW PER MATCH") 3833 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3834 text = "ALL ROWS PER MATCH" 3835 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3836 text += " SHOW EMPTY MATCHES" 3837 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3838 text += " OMIT EMPTY MATCHES" 3839 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3840 text += " WITH UNMATCHED ROWS" 3841 rows = exp.var(text) 3842 else: 3843 rows = None 3844 3845 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3846 text = "AFTER MATCH SKIP" 3847 if self._match_text_seq("PAST", "LAST", "ROW"): 3848 text += " PAST LAST ROW" 3849 elif self._match_text_seq("TO", "NEXT", "ROW"): 3850 text += " TO NEXT ROW" 3851 elif self._match_text_seq("TO", "FIRST"): 3852 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3853 elif self._match_text_seq("TO", "LAST"): 3854 text += f" TO LAST {self._advance_any().text}" # type: ignore 3855 after = exp.var(text) 3856 else: 3857 after = None 3858 3859 if self._match_text_seq("PATTERN"): 3860 self._match_l_paren() 3861 3862 if not self._curr: 3863 self.raise_error("Expecting )", self._curr) 3864 3865 paren = 1 3866 start = self._curr 3867 3868 while self._curr and paren > 0: 3869 if self._curr.token_type == TokenType.L_PAREN: 3870 paren += 1 3871 if self._curr.token_type == TokenType.R_PAREN: 3872 paren -= 1 3873 3874 end = self._prev 3875 self._advance() 3876 3877 if paren > 0: 3878 self.raise_error("Expecting )", self._curr) 3879 3880 pattern = exp.var(self._find_sql(start, end)) 3881 else: 3882 pattern = None 3883 3884 define = ( 3885 self._parse_csv(self._parse_name_as_expression) 3886 if self._match_text_seq("DEFINE") 3887 else None 3888 ) 3889 3890 self._match_r_paren() 3891 3892 return self.expression( 3893 exp.MatchRecognize, 3894 partition_by=partition, 3895 order=order, 3896 measures=measures, 3897 rows=rows, 3898 after=after, 3899 pattern=pattern, 3900 define=define, 3901 alias=self._parse_table_alias(), 3902 ) 3903 3904 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3905 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3906 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3907 cross_apply = False 3908 3909 if cross_apply is not None: 3910 this = self._parse_select(table=True) 3911 view = None 3912 outer = None 3913 elif self._match(TokenType.LATERAL): 3914 this = self._parse_select(table=True) 3915 view = self._match(TokenType.VIEW) 3916 outer = self._match(TokenType.OUTER) 3917 else: 3918 return None 3919 3920 if not this: 3921 this = ( 3922 self._parse_unnest() 3923 or self._parse_function() 3924 or self._parse_id_var(any_token=False) 3925 ) 3926 3927 while self._match(TokenType.DOT): 3928 this = exp.Dot( 3929 this=this, 3930 expression=self._parse_function() or self._parse_id_var(any_token=False), 3931 ) 3932 3933 ordinality: t.Optional[bool] = None 3934 3935 if view: 3936 table = self._parse_id_var(any_token=False) 3937 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3938 table_alias: t.Optional[exp.TableAlias] = self.expression( 3939 exp.TableAlias, this=table, columns=columns 3940 ) 3941 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3942 # We move the alias from the lateral's child node to the lateral itself 3943 table_alias = this.args["alias"].pop() 3944 else: 3945 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3946 table_alias = self._parse_table_alias() 3947 3948 return self.expression( 3949 exp.Lateral, 3950 this=this, 3951 view=view, 3952 outer=outer, 3953 alias=table_alias, 3954 cross_apply=cross_apply, 3955 ordinality=ordinality, 3956 ) 3957 3958 def _parse_stream(self) -> t.Optional[exp.Stream]: 3959 index = self._index 3960 if self._match_text_seq("STREAM"): 3961 this = self._try_parse(self._parse_table) 3962 if this: 3963 return self.expression(exp.Stream, this=this) 3964 3965 self._retreat(index) 3966 return None 3967 3968 def _parse_join_parts( 3969 self, 3970 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3971 return ( 3972 self._match_set(self.JOIN_METHODS) and self._prev, 3973 self._match_set(self.JOIN_SIDES) and self._prev, 3974 self._match_set(self.JOIN_KINDS) and self._prev, 3975 ) 3976 3977 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3978 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3979 this = self._parse_column() 3980 if isinstance(this, exp.Column): 3981 return this.this 3982 return this 3983 3984 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3985 3986 def _parse_join( 3987 self, skip_join_token: bool = False, parse_bracket: bool = False 3988 ) -> t.Optional[exp.Join]: 3989 if self._match(TokenType.COMMA): 3990 table = self._try_parse(self._parse_table) 3991 cross_join = self.expression(exp.Join, this=table) if table else None 3992 3993 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3994 cross_join.set("kind", "CROSS") 3995 3996 return cross_join 3997 3998 index = self._index 3999 method, side, kind = self._parse_join_parts() 4000 directed = self._match_text_seq("DIRECTED") 4001 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4002 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4003 join_comments = self._prev_comments 4004 4005 if not skip_join_token and not join: 4006 self._retreat(index) 4007 kind = None 4008 method = None 4009 side = None 4010 4011 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4012 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4013 4014 if not skip_join_token and not join and not outer_apply and not cross_apply: 4015 return None 4016 4017 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4018 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4019 kwargs["expressions"] = self._parse_csv( 4020 lambda: self._parse_table(parse_bracket=parse_bracket) 4021 ) 4022 4023 if method: 4024 kwargs["method"] = method.text.upper() 4025 if side: 4026 kwargs["side"] = side.text.upper() 4027 if kind: 4028 kwargs["kind"] = kind.text.upper() 4029 if hint: 4030 kwargs["hint"] = hint 4031 4032 if self._match(TokenType.MATCH_CONDITION): 4033 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4034 4035 if self._match(TokenType.ON): 4036 kwargs["on"] = self._parse_disjunction() 4037 elif self._match(TokenType.USING): 4038 kwargs["using"] = self._parse_using_identifiers() 4039 elif ( 4040 not method 4041 and not (outer_apply or cross_apply) 4042 and not isinstance(kwargs["this"], exp.Unnest) 4043 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4044 ): 4045 index = self._index 4046 joins: t.Optional[list] = list(self._parse_joins()) 4047 4048 if joins and self._match(TokenType.ON): 4049 kwargs["on"] = self._parse_disjunction() 4050 elif joins and self._match(TokenType.USING): 4051 kwargs["using"] = self._parse_using_identifiers() 4052 else: 4053 joins = None 4054 self._retreat(index) 4055 4056 kwargs["this"].set("joins", joins if joins else None) 4057 4058 kwargs["pivots"] = self._parse_pivots() 4059 4060 comments = [c for token in (method, side, kind) if token for c in token.comments] 4061 comments = (join_comments or []) + comments 4062 4063 if ( 4064 self.ADD_JOIN_ON_TRUE 4065 and not kwargs.get("on") 4066 and not kwargs.get("using") 4067 and not kwargs.get("method") 4068 and kwargs.get("kind") in (None, "INNER", "OUTER") 4069 ): 4070 kwargs["on"] = exp.true() 4071 4072 if directed: 4073 kwargs["directed"] = directed 4074 4075 return self.expression(exp.Join, comments=comments, **kwargs) 4076 4077 def _parse_opclass(self) -> t.Optional[exp.Expression]: 4078 this = self._parse_disjunction() 4079 4080 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4081 return this 4082 4083 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4084 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 4085 4086 return this 4087 4088 def _parse_index_params(self) -> exp.IndexParameters: 4089 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4090 4091 if self._match(TokenType.L_PAREN, advance=False): 4092 columns = self._parse_wrapped_csv(self._parse_with_operator) 4093 else: 4094 columns = None 4095 4096 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4097 partition_by = self._parse_partition_by() 4098 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4099 tablespace = ( 4100 self._parse_var(any_token=True) 4101 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4102 else None 4103 ) 4104 where = self._parse_where() 4105 4106 on = self._parse_field() if self._match(TokenType.ON) else None 4107 4108 return self.expression( 4109 exp.IndexParameters, 4110 using=using, 4111 columns=columns, 4112 include=include, 4113 partition_by=partition_by, 4114 where=where, 4115 with_storage=with_storage, 4116 tablespace=tablespace, 4117 on=on, 4118 ) 4119 4120 def _parse_index( 4121 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 4122 ) -> t.Optional[exp.Index]: 4123 if index or anonymous: 4124 unique = None 4125 primary = None 4126 amp = None 4127 4128 self._match(TokenType.ON) 4129 self._match(TokenType.TABLE) # hive 4130 table = self._parse_table_parts(schema=True) 4131 else: 4132 unique = self._match(TokenType.UNIQUE) 4133 primary = self._match_text_seq("PRIMARY") 4134 amp = self._match_text_seq("AMP") 4135 4136 if not self._match(TokenType.INDEX): 4137 return None 4138 4139 index = self._parse_id_var() 4140 table = None 4141 4142 params = self._parse_index_params() 4143 4144 return self.expression( 4145 exp.Index, 4146 this=index, 4147 table=table, 4148 unique=unique, 4149 primary=primary, 4150 amp=amp, 4151 params=params, 4152 ) 4153 4154 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 4155 hints: t.List[exp.Expression] = [] 4156 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4157 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4158 hints.append( 4159 self.expression( 4160 exp.WithTableHint, 4161 expressions=self._parse_csv( 4162 lambda: self._parse_function() or self._parse_var(any_token=True) 4163 ), 4164 ) 4165 ) 4166 self._match_r_paren() 4167 else: 4168 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4169 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4170 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4171 4172 self._match_set((TokenType.INDEX, TokenType.KEY)) 4173 if self._match(TokenType.FOR): 4174 hint.set("target", self._advance_any() and self._prev.text.upper()) 4175 4176 hint.set("expressions", self._parse_wrapped_id_vars()) 4177 hints.append(hint) 4178 4179 return hints or None 4180 4181 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4182 return ( 4183 (not schema and self._parse_function(optional_parens=False)) 4184 or self._parse_id_var(any_token=False) 4185 or self._parse_string_as_identifier() 4186 or self._parse_placeholder() 4187 ) 4188 4189 def _parse_table_parts( 4190 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4191 ) -> exp.Table: 4192 catalog = None 4193 db = None 4194 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4195 4196 while self._match(TokenType.DOT): 4197 if catalog: 4198 # This allows nesting the table in arbitrarily many dot expressions if needed 4199 table = self.expression( 4200 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4201 ) 4202 else: 4203 catalog = db 4204 db = table 4205 # "" used for tsql FROM a..b case 4206 table = self._parse_table_part(schema=schema) or "" 4207 4208 if ( 4209 wildcard 4210 and self._is_connected() 4211 and (isinstance(table, exp.Identifier) or not table) 4212 and self._match(TokenType.STAR) 4213 ): 4214 if isinstance(table, exp.Identifier): 4215 table.args["this"] += "*" 4216 else: 4217 table = exp.Identifier(this="*") 4218 4219 # We bubble up comments from the Identifier to the Table 4220 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4221 4222 if is_db_reference: 4223 catalog = db 4224 db = table 4225 table = None 4226 4227 if not table and not is_db_reference: 4228 self.raise_error(f"Expected table name but got {self._curr}") 4229 if not db and is_db_reference: 4230 self.raise_error(f"Expected database name but got {self._curr}") 4231 4232 table = self.expression( 4233 exp.Table, 4234 comments=comments, 4235 this=table, 4236 db=db, 4237 catalog=catalog, 4238 ) 4239 4240 changes = self._parse_changes() 4241 if changes: 4242 table.set("changes", changes) 4243 4244 at_before = self._parse_historical_data() 4245 if at_before: 4246 table.set("when", at_before) 4247 4248 pivots = self._parse_pivots() 4249 if pivots: 4250 table.set("pivots", pivots) 4251 4252 return table 4253 4254 def _parse_table( 4255 self, 4256 schema: bool = False, 4257 joins: bool = False, 4258 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4259 parse_bracket: bool = False, 4260 is_db_reference: bool = False, 4261 parse_partition: bool = False, 4262 consume_pipe: bool = False, 4263 ) -> t.Optional[exp.Expression]: 4264 stream = self._parse_stream() 4265 if stream: 4266 return stream 4267 4268 lateral = self._parse_lateral() 4269 if lateral: 4270 return lateral 4271 4272 unnest = self._parse_unnest() 4273 if unnest: 4274 return unnest 4275 4276 values = self._parse_derived_table_values() 4277 if values: 4278 return values 4279 4280 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4281 if subquery: 4282 if not subquery.args.get("pivots"): 4283 subquery.set("pivots", self._parse_pivots()) 4284 return subquery 4285 4286 bracket = parse_bracket and self._parse_bracket(None) 4287 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4288 4289 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4290 self._parse_table 4291 ) 4292 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4293 4294 only = self._match(TokenType.ONLY) 4295 4296 this = t.cast( 4297 exp.Expression, 4298 bracket 4299 or rows_from 4300 or self._parse_bracket( 4301 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4302 ), 4303 ) 4304 4305 if only: 4306 this.set("only", only) 4307 4308 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4309 self._match_text_seq("*") 4310 4311 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4312 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4313 this.set("partition", self._parse_partition()) 4314 4315 if schema: 4316 return self._parse_schema(this=this) 4317 4318 version = self._parse_version() 4319 4320 if version: 4321 this.set("version", version) 4322 4323 if self.dialect.ALIAS_POST_TABLESAMPLE: 4324 this.set("sample", self._parse_table_sample()) 4325 4326 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4327 if alias: 4328 this.set("alias", alias) 4329 4330 if self._match(TokenType.INDEXED_BY): 4331 this.set("indexed", self._parse_table_parts()) 4332 elif self._match_text_seq("NOT", "INDEXED"): 4333 this.set("indexed", False) 4334 4335 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4336 return self.expression( 4337 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4338 ) 4339 4340 this.set("hints", self._parse_table_hints()) 4341 4342 if not this.args.get("pivots"): 4343 this.set("pivots", self._parse_pivots()) 4344 4345 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4346 this.set("sample", self._parse_table_sample()) 4347 4348 if joins: 4349 for join in self._parse_joins(): 4350 this.append("joins", join) 4351 4352 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4353 this.set("ordinality", True) 4354 this.set("alias", self._parse_table_alias()) 4355 4356 return this 4357 4358 def _parse_version(self) -> t.Optional[exp.Version]: 4359 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4360 this = "TIMESTAMP" 4361 elif self._match(TokenType.VERSION_SNAPSHOT): 4362 this = "VERSION" 4363 else: 4364 return None 4365 4366 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4367 kind = self._prev.text.upper() 4368 start = self._parse_bitwise() 4369 self._match_texts(("TO", "AND")) 4370 end = self._parse_bitwise() 4371 expression: t.Optional[exp.Expression] = self.expression( 4372 exp.Tuple, expressions=[start, end] 4373 ) 4374 elif self._match_text_seq("CONTAINED", "IN"): 4375 kind = "CONTAINED IN" 4376 expression = self.expression( 4377 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4378 ) 4379 elif self._match(TokenType.ALL): 4380 kind = "ALL" 4381 expression = None 4382 else: 4383 self._match_text_seq("AS", "OF") 4384 kind = "AS OF" 4385 expression = self._parse_type() 4386 4387 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4388 4389 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4390 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4391 index = self._index 4392 historical_data = None 4393 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4394 this = self._prev.text.upper() 4395 kind = ( 4396 self._match(TokenType.L_PAREN) 4397 and self._match_texts(self.HISTORICAL_DATA_KIND) 4398 and self._prev.text.upper() 4399 ) 4400 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4401 4402 if expression: 4403 self._match_r_paren() 4404 historical_data = self.expression( 4405 exp.HistoricalData, this=this, kind=kind, expression=expression 4406 ) 4407 else: 4408 self._retreat(index) 4409 4410 return historical_data 4411 4412 def _parse_changes(self) -> t.Optional[exp.Changes]: 4413 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4414 return None 4415 4416 information = self._parse_var(any_token=True) 4417 self._match_r_paren() 4418 4419 return self.expression( 4420 exp.Changes, 4421 information=information, 4422 at_before=self._parse_historical_data(), 4423 end=self._parse_historical_data(), 4424 ) 4425 4426 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4427 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4428 return None 4429 4430 self._advance() 4431 4432 expressions = self._parse_wrapped_csv(self._parse_equality) 4433 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4434 4435 alias = self._parse_table_alias() if with_alias else None 4436 4437 if alias: 4438 if self.dialect.UNNEST_COLUMN_ONLY: 4439 if alias.args.get("columns"): 4440 self.raise_error("Unexpected extra column alias in unnest.") 4441 4442 alias.set("columns", [alias.this]) 4443 alias.set("this", None) 4444 4445 columns = alias.args.get("columns") or [] 4446 if offset and len(expressions) < len(columns): 4447 offset = columns.pop() 4448 4449 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4450 self._match(TokenType.ALIAS) 4451 offset = self._parse_id_var( 4452 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4453 ) or exp.to_identifier("offset") 4454 4455 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4456 4457 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4458 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4459 if not is_derived and not ( 4460 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4461 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4462 ): 4463 return None 4464 4465 expressions = self._parse_csv(self._parse_value) 4466 alias = self._parse_table_alias() 4467 4468 if is_derived: 4469 self._match_r_paren() 4470 4471 return self.expression( 4472 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4473 ) 4474 4475 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4476 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4477 as_modifier and self._match_text_seq("USING", "SAMPLE") 4478 ): 4479 return None 4480 4481 bucket_numerator = None 4482 bucket_denominator = None 4483 bucket_field = None 4484 percent = None 4485 size = None 4486 seed = None 4487 4488 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4489 matched_l_paren = self._match(TokenType.L_PAREN) 4490 4491 if self.TABLESAMPLE_CSV: 4492 num = None 4493 expressions = self._parse_csv(self._parse_primary) 4494 else: 4495 expressions = None 4496 num = ( 4497 self._parse_factor() 4498 if self._match(TokenType.NUMBER, advance=False) 4499 else self._parse_primary() or self._parse_placeholder() 4500 ) 4501 4502 if self._match_text_seq("BUCKET"): 4503 bucket_numerator = self._parse_number() 4504 self._match_text_seq("OUT", "OF") 4505 bucket_denominator = bucket_denominator = self._parse_number() 4506 self._match(TokenType.ON) 4507 bucket_field = self._parse_field() 4508 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4509 percent = num 4510 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4511 size = num 4512 else: 4513 percent = num 4514 4515 if matched_l_paren: 4516 self._match_r_paren() 4517 4518 if self._match(TokenType.L_PAREN): 4519 method = self._parse_var(upper=True) 4520 seed = self._match(TokenType.COMMA) and self._parse_number() 4521 self._match_r_paren() 4522 elif self._match_texts(("SEED", "REPEATABLE")): 4523 seed = self._parse_wrapped(self._parse_number) 4524 4525 if not method and self.DEFAULT_SAMPLING_METHOD: 4526 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4527 4528 return self.expression( 4529 exp.TableSample, 4530 expressions=expressions, 4531 method=method, 4532 bucket_numerator=bucket_numerator, 4533 bucket_denominator=bucket_denominator, 4534 bucket_field=bucket_field, 4535 percent=percent, 4536 size=size, 4537 seed=seed, 4538 ) 4539 4540 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4541 return list(iter(self._parse_pivot, None)) or None 4542 4543 def _parse_joins(self) -> t.Iterator[exp.Join]: 4544 return iter(self._parse_join, None) 4545 4546 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4547 if not self._match(TokenType.INTO): 4548 return None 4549 4550 return self.expression( 4551 exp.UnpivotColumns, 4552 this=self._match_text_seq("NAME") and self._parse_column(), 4553 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4554 ) 4555 4556 # https://duckdb.org/docs/sql/statements/pivot 4557 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4558 def _parse_on() -> t.Optional[exp.Expression]: 4559 this = self._parse_bitwise() 4560 4561 if self._match(TokenType.IN): 4562 # PIVOT ... ON col IN (row_val1, row_val2) 4563 return self._parse_in(this) 4564 if self._match(TokenType.ALIAS, advance=False): 4565 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4566 return self._parse_alias(this) 4567 4568 return this 4569 4570 this = self._parse_table() 4571 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4572 into = self._parse_unpivot_columns() 4573 using = self._match(TokenType.USING) and self._parse_csv( 4574 lambda: self._parse_alias(self._parse_column()) 4575 ) 4576 group = self._parse_group() 4577 4578 return self.expression( 4579 exp.Pivot, 4580 this=this, 4581 expressions=expressions, 4582 using=using, 4583 group=group, 4584 unpivot=is_unpivot, 4585 into=into, 4586 ) 4587 4588 def _parse_pivot_in(self) -> exp.In: 4589 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4590 this = self._parse_select_or_expression() 4591 4592 self._match(TokenType.ALIAS) 4593 alias = self._parse_bitwise() 4594 if alias: 4595 if isinstance(alias, exp.Column) and not alias.db: 4596 alias = alias.this 4597 return self.expression(exp.PivotAlias, this=this, alias=alias) 4598 4599 return this 4600 4601 value = self._parse_column() 4602 4603 if not self._match(TokenType.IN): 4604 self.raise_error("Expecting IN") 4605 4606 if self._match(TokenType.L_PAREN): 4607 if self._match(TokenType.ANY): 4608 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4609 else: 4610 exprs = self._parse_csv(_parse_aliased_expression) 4611 self._match_r_paren() 4612 return self.expression(exp.In, this=value, expressions=exprs) 4613 4614 return self.expression(exp.In, this=value, field=self._parse_id_var()) 4615 4616 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4617 func = self._parse_function() 4618 if not func: 4619 if self._prev and self._prev.token_type == TokenType.COMMA: 4620 return None 4621 self.raise_error("Expecting an aggregation function in PIVOT") 4622 4623 return self._parse_alias(func) 4624 4625 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4626 index = self._index 4627 include_nulls = None 4628 4629 if self._match(TokenType.PIVOT): 4630 unpivot = False 4631 elif self._match(TokenType.UNPIVOT): 4632 unpivot = True 4633 4634 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4635 if self._match_text_seq("INCLUDE", "NULLS"): 4636 include_nulls = True 4637 elif self._match_text_seq("EXCLUDE", "NULLS"): 4638 include_nulls = False 4639 else: 4640 return None 4641 4642 expressions = [] 4643 4644 if not self._match(TokenType.L_PAREN): 4645 self._retreat(index) 4646 return None 4647 4648 if unpivot: 4649 expressions = self._parse_csv(self._parse_column) 4650 else: 4651 expressions = self._parse_csv(self._parse_pivot_aggregation) 4652 4653 if not expressions: 4654 self.raise_error("Failed to parse PIVOT's aggregation list") 4655 4656 if not self._match(TokenType.FOR): 4657 self.raise_error("Expecting FOR") 4658 4659 fields = [] 4660 while True: 4661 field = self._try_parse(self._parse_pivot_in) 4662 if not field: 4663 break 4664 fields.append(field) 4665 4666 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4667 self._parse_bitwise 4668 ) 4669 4670 group = self._parse_group() 4671 4672 self._match_r_paren() 4673 4674 pivot = self.expression( 4675 exp.Pivot, 4676 expressions=expressions, 4677 fields=fields, 4678 unpivot=unpivot, 4679 include_nulls=include_nulls, 4680 default_on_null=default_on_null, 4681 group=group, 4682 ) 4683 4684 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4685 pivot.set("alias", self._parse_table_alias()) 4686 4687 if not unpivot: 4688 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4689 4690 columns: t.List[exp.Expression] = [] 4691 all_fields = [] 4692 for pivot_field in pivot.fields: 4693 pivot_field_expressions = pivot_field.expressions 4694 4695 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4696 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4697 continue 4698 4699 all_fields.append( 4700 [ 4701 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4702 for fld in pivot_field_expressions 4703 ] 4704 ) 4705 4706 if all_fields: 4707 if names: 4708 all_fields.append(names) 4709 4710 # Generate all possible combinations of the pivot columns 4711 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4712 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4713 for fld_parts_tuple in itertools.product(*all_fields): 4714 fld_parts = list(fld_parts_tuple) 4715 4716 if names and self.PREFIXED_PIVOT_COLUMNS: 4717 # Move the "name" to the front of the list 4718 fld_parts.insert(0, fld_parts.pop(-1)) 4719 4720 columns.append(exp.to_identifier("_".join(fld_parts))) 4721 4722 pivot.set("columns", columns) 4723 4724 return pivot 4725 4726 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4727 return [agg.alias for agg in aggregations if agg.alias] 4728 4729 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4730 if not skip_where_token and not self._match(TokenType.PREWHERE): 4731 return None 4732 4733 return self.expression( 4734 exp.PreWhere, comments=self._prev_comments, this=self._parse_disjunction() 4735 ) 4736 4737 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4738 if not skip_where_token and not self._match(TokenType.WHERE): 4739 return None 4740 4741 return self.expression( 4742 exp.Where, comments=self._prev_comments, this=self._parse_disjunction() 4743 ) 4744 4745 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4746 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4747 return None 4748 comments = self._prev_comments 4749 4750 elements: t.Dict[str, t.Any] = defaultdict(list) 4751 4752 if self._match(TokenType.ALL): 4753 elements["all"] = True 4754 elif self._match(TokenType.DISTINCT): 4755 elements["all"] = False 4756 4757 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4758 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4759 4760 while True: 4761 index = self._index 4762 4763 elements["expressions"].extend( 4764 self._parse_csv( 4765 lambda: None 4766 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4767 else self._parse_disjunction() 4768 ) 4769 ) 4770 4771 before_with_index = self._index 4772 with_prefix = self._match(TokenType.WITH) 4773 4774 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4775 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4776 elements[key].append(cube_or_rollup) 4777 elif grouping_sets := self._parse_grouping_sets(): 4778 elements["grouping_sets"].append(grouping_sets) 4779 elif self._match_text_seq("TOTALS"): 4780 elements["totals"] = True # type: ignore 4781 4782 if before_with_index <= self._index <= before_with_index + 1: 4783 self._retreat(before_with_index) 4784 break 4785 4786 if index == self._index: 4787 break 4788 4789 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4790 4791 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4792 if self._match(TokenType.CUBE): 4793 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4794 elif self._match(TokenType.ROLLUP): 4795 kind = exp.Rollup 4796 else: 4797 return None 4798 4799 return self.expression( 4800 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise) 4801 ) 4802 4803 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4804 if self._match(TokenType.GROUPING_SETS): 4805 return self.expression( 4806 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4807 ) 4808 return None 4809 4810 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4811 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4812 4813 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4814 if not skip_having_token and not self._match(TokenType.HAVING): 4815 return None 4816 return self.expression( 4817 exp.Having, comments=self._prev_comments, this=self._parse_disjunction() 4818 ) 4819 4820 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4821 if not self._match(TokenType.QUALIFY): 4822 return None 4823 return self.expression(exp.Qualify, this=self._parse_disjunction()) 4824 4825 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4826 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4827 exp.Prior, this=self._parse_bitwise() 4828 ) 4829 connect = self._parse_disjunction() 4830 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4831 return connect 4832 4833 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4834 if skip_start_token: 4835 start = None 4836 elif self._match(TokenType.START_WITH): 4837 start = self._parse_disjunction() 4838 else: 4839 return None 4840 4841 self._match(TokenType.CONNECT_BY) 4842 nocycle = self._match_text_seq("NOCYCLE") 4843 connect = self._parse_connect_with_prior() 4844 4845 if not start and self._match(TokenType.START_WITH): 4846 start = self._parse_disjunction() 4847 4848 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4849 4850 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4851 this = self._parse_id_var(any_token=True) 4852 if self._match(TokenType.ALIAS): 4853 this = self.expression(exp.Alias, alias=this, this=self._parse_disjunction()) 4854 return this 4855 4856 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4857 if self._match_text_seq("INTERPOLATE"): 4858 return self._parse_wrapped_csv(self._parse_name_as_expression) 4859 return None 4860 4861 def _parse_order( 4862 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4863 ) -> t.Optional[exp.Expression]: 4864 siblings = None 4865 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4866 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4867 return this 4868 4869 siblings = True 4870 4871 return self.expression( 4872 exp.Order, 4873 comments=self._prev_comments, 4874 this=this, 4875 expressions=self._parse_csv(self._parse_ordered), 4876 siblings=siblings, 4877 ) 4878 4879 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4880 if not self._match(token): 4881 return None 4882 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4883 4884 def _parse_ordered( 4885 self, parse_method: t.Optional[t.Callable] = None 4886 ) -> t.Optional[exp.Ordered]: 4887 this = parse_method() if parse_method else self._parse_disjunction() 4888 if not this: 4889 return None 4890 4891 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4892 this = exp.var("ALL") 4893 4894 asc = self._match(TokenType.ASC) 4895 desc = self._match(TokenType.DESC) or (asc and False) 4896 4897 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4898 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4899 4900 nulls_first = is_nulls_first or False 4901 explicitly_null_ordered = is_nulls_first or is_nulls_last 4902 4903 if ( 4904 not explicitly_null_ordered 4905 and ( 4906 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4907 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4908 ) 4909 and self.dialect.NULL_ORDERING != "nulls_are_last" 4910 ): 4911 nulls_first = True 4912 4913 if self._match_text_seq("WITH", "FILL"): 4914 with_fill = self.expression( 4915 exp.WithFill, 4916 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 4917 to=self._match_text_seq("TO") and self._parse_bitwise(), 4918 step=self._match_text_seq("STEP") and self._parse_bitwise(), 4919 interpolate=self._parse_interpolate(), 4920 ) 4921 else: 4922 with_fill = None 4923 4924 return self.expression( 4925 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4926 ) 4927 4928 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4929 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4930 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4931 self._match_text_seq("ONLY") 4932 with_ties = self._match_text_seq("WITH", "TIES") 4933 4934 if not (percent or rows or with_ties): 4935 return None 4936 4937 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4938 4939 def _parse_limit( 4940 self, 4941 this: t.Optional[exp.Expression] = None, 4942 top: bool = False, 4943 skip_limit_token: bool = False, 4944 ) -> t.Optional[exp.Expression]: 4945 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4946 comments = self._prev_comments 4947 if top: 4948 limit_paren = self._match(TokenType.L_PAREN) 4949 expression = self._parse_term() if limit_paren else self._parse_number() 4950 4951 if limit_paren: 4952 self._match_r_paren() 4953 4954 else: 4955 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4956 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4957 # consume the factor plus parse the percentage separately 4958 index = self._index 4959 expression = self._try_parse(self._parse_term) 4960 if isinstance(expression, exp.Mod): 4961 self._retreat(index) 4962 expression = self._parse_factor() 4963 elif not expression: 4964 expression = self._parse_factor() 4965 limit_options = self._parse_limit_options() 4966 4967 if self._match(TokenType.COMMA): 4968 offset = expression 4969 expression = self._parse_term() 4970 else: 4971 offset = None 4972 4973 limit_exp = self.expression( 4974 exp.Limit, 4975 this=this, 4976 expression=expression, 4977 offset=offset, 4978 comments=comments, 4979 limit_options=limit_options, 4980 expressions=self._parse_limit_by(), 4981 ) 4982 4983 return limit_exp 4984 4985 if self._match(TokenType.FETCH): 4986 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4987 direction = self._prev.text.upper() if direction else "FIRST" 4988 4989 count = self._parse_field(tokens=self.FETCH_TOKENS) 4990 4991 return self.expression( 4992 exp.Fetch, 4993 direction=direction, 4994 count=count, 4995 limit_options=self._parse_limit_options(), 4996 ) 4997 4998 return this 4999 5000 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5001 if not self._match(TokenType.OFFSET): 5002 return this 5003 5004 count = self._parse_term() 5005 self._match_set((TokenType.ROW, TokenType.ROWS)) 5006 5007 return self.expression( 5008 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 5009 ) 5010 5011 def _can_parse_limit_or_offset(self) -> bool: 5012 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5013 return False 5014 5015 index = self._index 5016 result = bool( 5017 self._try_parse(self._parse_limit, retreat=True) 5018 or self._try_parse(self._parse_offset, retreat=True) 5019 ) 5020 self._retreat(index) 5021 5022 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5023 if self._next and self._next.token_type == TokenType.MATCH_CONDITION: 5024 result = False 5025 5026 return result 5027 5028 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 5029 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 5030 5031 def _parse_locks(self) -> t.List[exp.Lock]: 5032 locks = [] 5033 while True: 5034 update, key = None, None 5035 if self._match_text_seq("FOR", "UPDATE"): 5036 update = True 5037 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5038 "LOCK", "IN", "SHARE", "MODE" 5039 ): 5040 update = False 5041 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5042 update, key = False, True 5043 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5044 update, key = True, True 5045 else: 5046 break 5047 5048 expressions = None 5049 if self._match_text_seq("OF"): 5050 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5051 5052 wait: t.Optional[bool | exp.Expression] = None 5053 if self._match_text_seq("NOWAIT"): 5054 wait = True 5055 elif self._match_text_seq("WAIT"): 5056 wait = self._parse_primary() 5057 elif self._match_text_seq("SKIP", "LOCKED"): 5058 wait = False 5059 5060 locks.append( 5061 self.expression( 5062 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 5063 ) 5064 ) 5065 5066 return locks 5067 5068 def parse_set_operation( 5069 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 5070 ) -> t.Optional[exp.Expression]: 5071 start = self._index 5072 _, side_token, kind_token = self._parse_join_parts() 5073 5074 side = side_token.text if side_token else None 5075 kind = kind_token.text if kind_token else None 5076 5077 if not self._match_set(self.SET_OPERATIONS): 5078 self._retreat(start) 5079 return None 5080 5081 token_type = self._prev.token_type 5082 5083 if token_type == TokenType.UNION: 5084 operation: t.Type[exp.SetOperation] = exp.Union 5085 elif token_type == TokenType.EXCEPT: 5086 operation = exp.Except 5087 else: 5088 operation = exp.Intersect 5089 5090 comments = self._prev.comments 5091 5092 if self._match(TokenType.DISTINCT): 5093 distinct: t.Optional[bool] = True 5094 elif self._match(TokenType.ALL): 5095 distinct = False 5096 else: 5097 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5098 if distinct is None: 5099 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5100 5101 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 5102 "STRICT", "CORRESPONDING" 5103 ) 5104 if self._match_text_seq("CORRESPONDING"): 5105 by_name = True 5106 if not side and not kind: 5107 kind = "INNER" 5108 5109 on_column_list = None 5110 if by_name and self._match_texts(("ON", "BY")): 5111 on_column_list = self._parse_wrapped_csv(self._parse_column) 5112 5113 expression = self._parse_select( 5114 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5115 ) 5116 5117 return self.expression( 5118 operation, 5119 comments=comments, 5120 this=this, 5121 distinct=distinct, 5122 by_name=by_name, 5123 expression=expression, 5124 side=side, 5125 kind=kind, 5126 on=on_column_list, 5127 ) 5128 5129 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5130 while this: 5131 setop = self.parse_set_operation(this) 5132 if not setop: 5133 break 5134 this = setop 5135 5136 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5137 expression = this.expression 5138 5139 if expression: 5140 for arg in self.SET_OP_MODIFIERS: 5141 expr = expression.args.get(arg) 5142 if expr: 5143 this.set(arg, expr.pop()) 5144 5145 return this 5146 5147 def _parse_expression(self) -> t.Optional[exp.Expression]: 5148 return self._parse_alias(self._parse_assignment()) 5149 5150 def _parse_assignment(self) -> t.Optional[exp.Expression]: 5151 this = self._parse_disjunction() 5152 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 5153 # This allows us to parse <non-identifier token> := <expr> 5154 this = exp.column( 5155 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5156 ) 5157 5158 while self._match_set(self.ASSIGNMENT): 5159 if isinstance(this, exp.Column) and len(this.parts) == 1: 5160 this = this.this 5161 5162 this = self.expression( 5163 self.ASSIGNMENT[self._prev.token_type], 5164 this=this, 5165 comments=self._prev_comments, 5166 expression=self._parse_assignment(), 5167 ) 5168 5169 return this 5170 5171 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 5172 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 5173 5174 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 5175 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 5176 5177 def _parse_equality(self) -> t.Optional[exp.Expression]: 5178 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 5179 5180 def _parse_comparison(self) -> t.Optional[exp.Expression]: 5181 return self._parse_tokens(self._parse_range, self.COMPARISON) 5182 5183 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5184 this = this or self._parse_bitwise() 5185 negate = self._match(TokenType.NOT) 5186 5187 if self._match_set(self.RANGE_PARSERS): 5188 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5189 if not expression: 5190 return this 5191 5192 this = expression 5193 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5194 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5195 5196 # Postgres supports ISNULL and NOTNULL for conditions. 5197 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5198 if self._match(TokenType.NOTNULL): 5199 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5200 this = self.expression(exp.Not, this=this) 5201 5202 if negate: 5203 this = self._negate_range(this) 5204 5205 if self._match(TokenType.IS): 5206 this = self._parse_is(this) 5207 5208 return this 5209 5210 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5211 if not this: 5212 return this 5213 5214 return self.expression(exp.Not, this=this) 5215 5216 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5217 index = self._index - 1 5218 negate = self._match(TokenType.NOT) 5219 5220 if self._match_text_seq("DISTINCT", "FROM"): 5221 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5222 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5223 5224 if self._match(TokenType.JSON): 5225 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5226 5227 if self._match_text_seq("WITH"): 5228 _with = True 5229 elif self._match_text_seq("WITHOUT"): 5230 _with = False 5231 else: 5232 _with = None 5233 5234 unique = self._match(TokenType.UNIQUE) 5235 self._match_text_seq("KEYS") 5236 expression: t.Optional[exp.Expression] = self.expression( 5237 exp.JSON, 5238 this=kind, 5239 with_=_with, 5240 unique=unique, 5241 ) 5242 else: 5243 expression = self._parse_null() or self._parse_bitwise() 5244 if not expression: 5245 self._retreat(index) 5246 return None 5247 5248 this = self.expression(exp.Is, this=this, expression=expression) 5249 this = self.expression(exp.Not, this=this) if negate else this 5250 return self._parse_column_ops(this) 5251 5252 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5253 unnest = self._parse_unnest(with_alias=False) 5254 if unnest: 5255 this = self.expression(exp.In, this=this, unnest=unnest) 5256 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5257 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5258 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5259 5260 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5261 this = self.expression( 5262 exp.In, 5263 this=this, 5264 query=self._parse_query_modifiers(query).subquery(copy=False), 5265 ) 5266 else: 5267 this = self.expression(exp.In, this=this, expressions=expressions) 5268 5269 if matched_l_paren: 5270 self._match_r_paren(this) 5271 elif not self._match(TokenType.R_BRACKET, expression=this): 5272 self.raise_error("Expecting ]") 5273 else: 5274 this = self.expression(exp.In, this=this, field=self._parse_column()) 5275 5276 return this 5277 5278 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5279 symmetric = None 5280 if self._match_text_seq("SYMMETRIC"): 5281 symmetric = True 5282 elif self._match_text_seq("ASYMMETRIC"): 5283 symmetric = False 5284 5285 low = self._parse_bitwise() 5286 self._match(TokenType.AND) 5287 high = self._parse_bitwise() 5288 5289 return self.expression( 5290 exp.Between, 5291 this=this, 5292 low=low, 5293 high=high, 5294 symmetric=symmetric, 5295 ) 5296 5297 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5298 if not self._match(TokenType.ESCAPE): 5299 return this 5300 return self.expression( 5301 exp.Escape, this=this, expression=self._parse_string() or self._parse_null() 5302 ) 5303 5304 def _parse_interval_span(self, this: exp.Expression) -> exp.Interval: 5305 # handle day-time format interval span with omitted units: 5306 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5307 interval_span_units_omitted = None 5308 if ( 5309 this 5310 and this.is_string 5311 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5312 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5313 ): 5314 index = self._index 5315 5316 # Var "TO" Var 5317 first_unit = self._parse_var(any_token=True, upper=True) 5318 second_unit = None 5319 if first_unit and self._match_text_seq("TO"): 5320 second_unit = self._parse_var(any_token=True, upper=True) 5321 5322 interval_span_units_omitted = not (first_unit and second_unit) 5323 5324 self._retreat(index) 5325 5326 unit = ( 5327 None 5328 if interval_span_units_omitted 5329 else ( 5330 self._parse_function() 5331 or ( 5332 not self._match(TokenType.ALIAS, advance=False) 5333 and self._parse_var(any_token=True, upper=True) 5334 ) 5335 ) 5336 ) 5337 5338 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5339 # each INTERVAL expression into this canonical form so it's easy to transpile 5340 if this and this.is_number: 5341 this = exp.Literal.string(this.to_py()) 5342 elif this and this.is_string: 5343 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5344 if parts and unit: 5345 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5346 unit = None 5347 self._retreat(self._index - 1) 5348 5349 if len(parts) == 1: 5350 this = exp.Literal.string(parts[0][0]) 5351 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5352 5353 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5354 unit = self.expression( 5355 exp.IntervalSpan, 5356 this=unit, 5357 expression=self._parse_function() or self._parse_var(any_token=True, upper=True), 5358 ) 5359 5360 return self.expression(exp.Interval, this=this, unit=unit) 5361 5362 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5363 index = self._index 5364 5365 if not self._match(TokenType.INTERVAL) and match_interval: 5366 return None 5367 5368 if self._match(TokenType.STRING, advance=False): 5369 this = self._parse_primary() 5370 else: 5371 this = self._parse_term() 5372 5373 if not this or ( 5374 isinstance(this, exp.Column) 5375 and not this.table 5376 and not this.this.quoted 5377 and self._curr 5378 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5379 ): 5380 self._retreat(index) 5381 return None 5382 5383 interval = self._parse_interval_span(this) 5384 5385 index = self._index 5386 self._match(TokenType.PLUS) 5387 5388 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5389 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5390 return self.expression( 5391 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5392 ) 5393 5394 self._retreat(index) 5395 return interval 5396 5397 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5398 this = self._parse_term() 5399 5400 while True: 5401 if self._match_set(self.BITWISE): 5402 this = self.expression( 5403 self.BITWISE[self._prev.token_type], 5404 this=this, 5405 expression=self._parse_term(), 5406 ) 5407 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5408 this = self.expression( 5409 exp.DPipe, 5410 this=this, 5411 expression=self._parse_term(), 5412 safe=not self.dialect.STRICT_STRING_CONCAT, 5413 ) 5414 elif self._match(TokenType.DQMARK): 5415 this = self.expression( 5416 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5417 ) 5418 elif self._match_pair(TokenType.LT, TokenType.LT): 5419 this = self.expression( 5420 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5421 ) 5422 elif self._match_pair(TokenType.GT, TokenType.GT): 5423 this = self.expression( 5424 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5425 ) 5426 else: 5427 break 5428 5429 return this 5430 5431 def _parse_term(self) -> t.Optional[exp.Expression]: 5432 this = self._parse_factor() 5433 5434 while self._match_set(self.TERM): 5435 klass = self.TERM[self._prev.token_type] 5436 comments = self._prev_comments 5437 expression = self._parse_factor() 5438 5439 this = self.expression(klass, this=this, comments=comments, expression=expression) 5440 5441 if isinstance(this, exp.Collate): 5442 expr = this.expression 5443 5444 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5445 # fallback to Identifier / Var 5446 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5447 ident = expr.this 5448 if isinstance(ident, exp.Identifier): 5449 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5450 5451 return this 5452 5453 def _parse_factor(self) -> t.Optional[exp.Expression]: 5454 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5455 this = self._parse_at_time_zone(parse_method()) 5456 5457 while self._match_set(self.FACTOR): 5458 klass = self.FACTOR[self._prev.token_type] 5459 comments = self._prev_comments 5460 expression = parse_method() 5461 5462 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5463 self._retreat(self._index - 1) 5464 return this 5465 5466 this = self.expression(klass, this=this, comments=comments, expression=expression) 5467 5468 if isinstance(this, exp.Div): 5469 this.set("typed", self.dialect.TYPED_DIVISION) 5470 this.set("safe", self.dialect.SAFE_DIVISION) 5471 5472 return this 5473 5474 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5475 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5476 5477 def _parse_unary(self) -> t.Optional[exp.Expression]: 5478 if self._match_set(self.UNARY_PARSERS): 5479 return self.UNARY_PARSERS[self._prev.token_type](self) 5480 return self._parse_type() 5481 5482 def _parse_type( 5483 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5484 ) -> t.Optional[exp.Expression]: 5485 interval = parse_interval and self._parse_interval() 5486 if interval: 5487 return self._parse_column_ops(interval) 5488 5489 index = self._index 5490 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5491 5492 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5493 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5494 if isinstance(data_type, exp.Cast): 5495 # This constructor can contain ops directly after it, for instance struct unnesting: 5496 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5497 return self._parse_column_ops(data_type) 5498 5499 if data_type: 5500 index2 = self._index 5501 this = self._parse_primary() 5502 5503 if isinstance(this, exp.Literal): 5504 literal = this.name 5505 this = self._parse_column_ops(this) 5506 5507 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5508 if parser: 5509 return parser(self, this, data_type) 5510 5511 if ( 5512 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5513 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5514 and TIME_ZONE_RE.search(literal) 5515 ): 5516 data_type = exp.DataType.build("TIMESTAMPTZ") 5517 5518 return self.expression(exp.Cast, this=this, to=data_type) 5519 5520 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5521 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5522 # 5523 # If the index difference here is greater than 1, that means the parser itself must have 5524 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5525 # 5526 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5527 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5528 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5529 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5530 # 5531 # In these cases, we don't really want to return the converted type, but instead retreat 5532 # and try to parse a Column or Identifier in the section below. 5533 if data_type.expressions and index2 - index > 1: 5534 self._retreat(index2) 5535 return self._parse_column_ops(data_type) 5536 5537 self._retreat(index) 5538 5539 if fallback_to_identifier: 5540 return self._parse_id_var() 5541 5542 this = self._parse_column() 5543 return this and self._parse_column_ops(this) 5544 5545 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5546 this = self._parse_type() 5547 if not this: 5548 return None 5549 5550 if isinstance(this, exp.Column) and not this.table: 5551 this = exp.var(this.name.upper()) 5552 5553 return self.expression( 5554 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5555 ) 5556 5557 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5558 type_name = identifier.name 5559 5560 while self._match(TokenType.DOT): 5561 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5562 5563 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5564 5565 def _parse_types( 5566 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5567 ) -> t.Optional[exp.Expression]: 5568 index = self._index 5569 5570 this: t.Optional[exp.Expression] = None 5571 prefix = self._match_text_seq("SYSUDTLIB", ".") 5572 5573 if self._match_set(self.TYPE_TOKENS): 5574 type_token = self._prev.token_type 5575 else: 5576 type_token = None 5577 identifier = allow_identifiers and self._parse_id_var( 5578 any_token=False, tokens=(TokenType.VAR,) 5579 ) 5580 if isinstance(identifier, exp.Identifier): 5581 try: 5582 tokens = self.dialect.tokenize(identifier.name) 5583 except TokenError: 5584 tokens = None 5585 5586 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5587 type_token = tokens[0].token_type 5588 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5589 this = self._parse_user_defined_type(identifier) 5590 else: 5591 self._retreat(self._index - 1) 5592 return None 5593 else: 5594 return None 5595 5596 if type_token == TokenType.PSEUDO_TYPE: 5597 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5598 5599 if type_token == TokenType.OBJECT_IDENTIFIER: 5600 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5601 5602 # https://materialize.com/docs/sql/types/map/ 5603 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5604 key_type = self._parse_types( 5605 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5606 ) 5607 if not self._match(TokenType.FARROW): 5608 self._retreat(index) 5609 return None 5610 5611 value_type = self._parse_types( 5612 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5613 ) 5614 if not self._match(TokenType.R_BRACKET): 5615 self._retreat(index) 5616 return None 5617 5618 return exp.DataType( 5619 this=exp.DataType.Type.MAP, 5620 expressions=[key_type, value_type], 5621 nested=True, 5622 prefix=prefix, 5623 ) 5624 5625 nested = type_token in self.NESTED_TYPE_TOKENS 5626 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5627 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5628 expressions = None 5629 maybe_func = False 5630 5631 if self._match(TokenType.L_PAREN): 5632 if is_struct: 5633 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5634 elif nested: 5635 expressions = self._parse_csv( 5636 lambda: self._parse_types( 5637 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5638 ) 5639 ) 5640 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5641 this = expressions[0] 5642 this.set("nullable", True) 5643 self._match_r_paren() 5644 return this 5645 elif type_token in self.ENUM_TYPE_TOKENS: 5646 expressions = self._parse_csv(self._parse_equality) 5647 elif is_aggregate: 5648 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5649 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5650 ) 5651 if not func_or_ident: 5652 return None 5653 expressions = [func_or_ident] 5654 if self._match(TokenType.COMMA): 5655 expressions.extend( 5656 self._parse_csv( 5657 lambda: self._parse_types( 5658 check_func=check_func, 5659 schema=schema, 5660 allow_identifiers=allow_identifiers, 5661 ) 5662 ) 5663 ) 5664 else: 5665 expressions = self._parse_csv(self._parse_type_size) 5666 5667 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5668 if type_token == TokenType.VECTOR and len(expressions) == 2: 5669 expressions = self._parse_vector_expressions(expressions) 5670 5671 if not self._match(TokenType.R_PAREN): 5672 self._retreat(index) 5673 return None 5674 5675 maybe_func = True 5676 5677 values: t.Optional[t.List[exp.Expression]] = None 5678 5679 if nested and self._match(TokenType.LT): 5680 if is_struct: 5681 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5682 else: 5683 expressions = self._parse_csv( 5684 lambda: self._parse_types( 5685 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5686 ) 5687 ) 5688 5689 if not self._match(TokenType.GT): 5690 self.raise_error("Expecting >") 5691 5692 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5693 values = self._parse_csv(self._parse_disjunction) 5694 if not values and is_struct: 5695 values = None 5696 self._retreat(self._index - 1) 5697 else: 5698 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5699 5700 if type_token in self.TIMESTAMPS: 5701 if self._match_text_seq("WITH", "TIME", "ZONE"): 5702 maybe_func = False 5703 tz_type = ( 5704 exp.DataType.Type.TIMETZ 5705 if type_token in self.TIMES 5706 else exp.DataType.Type.TIMESTAMPTZ 5707 ) 5708 this = exp.DataType(this=tz_type, expressions=expressions) 5709 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5710 maybe_func = False 5711 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5712 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5713 maybe_func = False 5714 elif type_token == TokenType.INTERVAL: 5715 if self._curr and self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 5716 unit = self._parse_var(upper=True) 5717 if self._match_text_seq("TO"): 5718 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5719 5720 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5721 else: 5722 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5723 elif type_token == TokenType.VOID: 5724 this = exp.DataType(this=exp.DataType.Type.NULL) 5725 5726 if maybe_func and check_func: 5727 index2 = self._index 5728 peek = self._parse_string() 5729 5730 if not peek: 5731 self._retreat(index) 5732 return None 5733 5734 self._retreat(index2) 5735 5736 if not this: 5737 if self._match_text_seq("UNSIGNED"): 5738 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5739 if not unsigned_type_token: 5740 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5741 5742 type_token = unsigned_type_token or type_token 5743 5744 # NULLABLE without parentheses can be a column (Presto/Trino) 5745 if type_token == TokenType.NULLABLE and not expressions: 5746 self._retreat(index) 5747 return None 5748 5749 this = exp.DataType( 5750 this=exp.DataType.Type[type_token.value], 5751 expressions=expressions, 5752 nested=nested, 5753 prefix=prefix, 5754 ) 5755 5756 # Empty arrays/structs are allowed 5757 if values is not None: 5758 cls = exp.Struct if is_struct else exp.Array 5759 this = exp.cast(cls(expressions=values), this, copy=False) 5760 5761 elif expressions: 5762 this.set("expressions", expressions) 5763 5764 # https://materialize.com/docs/sql/types/list/#type-name 5765 while self._match(TokenType.LIST): 5766 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5767 5768 index = self._index 5769 5770 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5771 matched_array = self._match(TokenType.ARRAY) 5772 5773 while self._curr: 5774 datatype_token = self._prev.token_type 5775 matched_l_bracket = self._match(TokenType.L_BRACKET) 5776 5777 if (not matched_l_bracket and not matched_array) or ( 5778 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5779 ): 5780 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5781 # not to be confused with the fixed size array parsing 5782 break 5783 5784 matched_array = False 5785 values = self._parse_csv(self._parse_disjunction) or None 5786 if ( 5787 values 5788 and not schema 5789 and ( 5790 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 5791 or datatype_token == TokenType.ARRAY 5792 or not self._match(TokenType.R_BRACKET, advance=False) 5793 ) 5794 ): 5795 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5796 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5797 self._retreat(index) 5798 break 5799 5800 this = exp.DataType( 5801 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5802 ) 5803 self._match(TokenType.R_BRACKET) 5804 5805 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5806 converter = self.TYPE_CONVERTERS.get(this.this) 5807 if converter: 5808 this = converter(t.cast(exp.DataType, this)) 5809 5810 return this 5811 5812 def _parse_vector_expressions( 5813 self, expressions: t.List[exp.Expression] 5814 ) -> t.List[exp.Expression]: 5815 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5816 5817 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5818 index = self._index 5819 5820 if ( 5821 self._curr 5822 and self._next 5823 and self._curr.token_type in self.TYPE_TOKENS 5824 and self._next.token_type in self.TYPE_TOKENS 5825 ): 5826 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5827 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5828 this = self._parse_id_var() 5829 else: 5830 this = ( 5831 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5832 or self._parse_id_var() 5833 ) 5834 5835 self._match(TokenType.COLON) 5836 5837 if ( 5838 type_required 5839 and not isinstance(this, exp.DataType) 5840 and not self._match_set(self.TYPE_TOKENS, advance=False) 5841 ): 5842 self._retreat(index) 5843 return self._parse_types() 5844 5845 return self._parse_column_def(this) 5846 5847 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5848 if not self._match_text_seq("AT", "TIME", "ZONE"): 5849 return this 5850 return self._parse_at_time_zone( 5851 self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5852 ) 5853 5854 def _parse_column(self) -> t.Optional[exp.Expression]: 5855 this = self._parse_column_reference() 5856 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5857 5858 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5859 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5860 5861 return column 5862 5863 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5864 this = self._parse_field() 5865 if ( 5866 not this 5867 and self._match(TokenType.VALUES, advance=False) 5868 and self.VALUES_FOLLOWED_BY_PAREN 5869 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5870 ): 5871 this = self._parse_id_var() 5872 5873 if isinstance(this, exp.Identifier): 5874 # We bubble up comments from the Identifier to the Column 5875 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5876 5877 return this 5878 5879 def _parse_colon_as_variant_extract( 5880 self, this: t.Optional[exp.Expression] 5881 ) -> t.Optional[exp.Expression]: 5882 casts = [] 5883 json_path = [] 5884 escape = None 5885 5886 while self._match(TokenType.COLON): 5887 start_index = self._index 5888 5889 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5890 path = self._parse_column_ops( 5891 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5892 ) 5893 5894 # The cast :: operator has a lower precedence than the extraction operator :, so 5895 # we rearrange the AST appropriately to avoid casting the JSON path 5896 while isinstance(path, exp.Cast): 5897 casts.append(path.to) 5898 path = path.this 5899 5900 if casts: 5901 dcolon_offset = next( 5902 i 5903 for i, t in enumerate(self._tokens[start_index:]) 5904 if t.token_type == TokenType.DCOLON 5905 ) 5906 end_token = self._tokens[start_index + dcolon_offset - 1] 5907 else: 5908 end_token = self._prev 5909 5910 if path: 5911 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5912 # it'll roundtrip to a string literal in GET_PATH 5913 if isinstance(path, exp.Identifier) and path.quoted: 5914 escape = True 5915 5916 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5917 5918 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5919 # Databricks transforms it back to the colon/dot notation 5920 if json_path: 5921 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5922 5923 if json_path_expr: 5924 json_path_expr.set("escape", escape) 5925 5926 this = self.expression( 5927 exp.JSONExtract, 5928 this=this, 5929 expression=json_path_expr, 5930 variant_extract=True, 5931 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5932 ) 5933 5934 while casts: 5935 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5936 5937 return this 5938 5939 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5940 return self._parse_types() 5941 5942 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5943 this = self._parse_bracket(this) 5944 5945 while self._match_set(self.COLUMN_OPERATORS): 5946 op_token = self._prev.token_type 5947 op = self.COLUMN_OPERATORS.get(op_token) 5948 5949 if op_token in self.CAST_COLUMN_OPERATORS: 5950 field = self._parse_dcolon() 5951 if not field: 5952 self.raise_error("Expected type") 5953 elif op and self._curr: 5954 field = self._parse_column_reference() or self._parse_bitwise() 5955 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5956 field = self._parse_column_ops(field) 5957 else: 5958 field = self._parse_field(any_token=True, anonymous_func=True) 5959 5960 # Function calls can be qualified, e.g., x.y.FOO() 5961 # This converts the final AST to a series of Dots leading to the function call 5962 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5963 if isinstance(field, (exp.Func, exp.Window)) and this: 5964 this = this.transform( 5965 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5966 ) 5967 5968 if op: 5969 this = op(self, this, field) 5970 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5971 this = self.expression( 5972 exp.Column, 5973 comments=this.comments, 5974 this=field, 5975 table=this.this, 5976 db=this.args.get("table"), 5977 catalog=this.args.get("db"), 5978 ) 5979 elif isinstance(field, exp.Window): 5980 # Move the exp.Dot's to the window's function 5981 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5982 field.set("this", window_func) 5983 this = field 5984 else: 5985 this = self.expression(exp.Dot, this=this, expression=field) 5986 5987 if field and field.comments: 5988 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5989 5990 this = self._parse_bracket(this) 5991 5992 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5993 5994 def _parse_paren(self) -> t.Optional[exp.Expression]: 5995 if not self._match(TokenType.L_PAREN): 5996 return None 5997 5998 comments = self._prev_comments 5999 query = self._parse_select() 6000 6001 if query: 6002 expressions = [query] 6003 else: 6004 expressions = self._parse_expressions() 6005 6006 this = seq_get(expressions, 0) 6007 6008 if not this and self._match(TokenType.R_PAREN, advance=False): 6009 this = self.expression(exp.Tuple) 6010 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6011 this = self._parse_subquery(this=this, parse_alias=False) 6012 elif isinstance(this, (exp.Subquery, exp.Values)): 6013 this = self._parse_subquery( 6014 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6015 parse_alias=False, 6016 ) 6017 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6018 this = self.expression(exp.Tuple, expressions=expressions) 6019 else: 6020 this = self.expression(exp.Paren, this=this) 6021 6022 if this: 6023 this.add_comments(comments) 6024 6025 self._match_r_paren(expression=this) 6026 6027 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6028 return self._parse_window(this) 6029 6030 return this 6031 6032 def _parse_primary(self) -> t.Optional[exp.Expression]: 6033 if self._match_set(self.PRIMARY_PARSERS): 6034 token_type = self._prev.token_type 6035 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6036 6037 if token_type == TokenType.STRING: 6038 expressions = [primary] 6039 while self._match(TokenType.STRING): 6040 expressions.append(exp.Literal.string(self._prev.text)) 6041 6042 if len(expressions) > 1: 6043 return self.expression( 6044 exp.Concat, expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE 6045 ) 6046 6047 return primary 6048 6049 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6050 return exp.Literal.number(f"0.{self._prev.text}") 6051 6052 return self._parse_paren() 6053 6054 def _parse_field( 6055 self, 6056 any_token: bool = False, 6057 tokens: t.Optional[t.Collection[TokenType]] = None, 6058 anonymous_func: bool = False, 6059 ) -> t.Optional[exp.Expression]: 6060 if anonymous_func: 6061 field = ( 6062 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6063 or self._parse_primary() 6064 ) 6065 else: 6066 field = self._parse_primary() or self._parse_function( 6067 anonymous=anonymous_func, any_token=any_token 6068 ) 6069 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6070 6071 def _parse_function( 6072 self, 6073 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6074 anonymous: bool = False, 6075 optional_parens: bool = True, 6076 any_token: bool = False, 6077 ) -> t.Optional[exp.Expression]: 6078 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6079 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6080 fn_syntax = False 6081 if ( 6082 self._match(TokenType.L_BRACE, advance=False) 6083 and self._next 6084 and self._next.text.upper() == "FN" 6085 ): 6086 self._advance(2) 6087 fn_syntax = True 6088 6089 func = self._parse_function_call( 6090 functions=functions, 6091 anonymous=anonymous, 6092 optional_parens=optional_parens, 6093 any_token=any_token, 6094 ) 6095 6096 if fn_syntax: 6097 self._match(TokenType.R_BRACE) 6098 6099 return func 6100 6101 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 6102 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6103 6104 def _parse_function_call( 6105 self, 6106 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6107 anonymous: bool = False, 6108 optional_parens: bool = True, 6109 any_token: bool = False, 6110 ) -> t.Optional[exp.Expression]: 6111 if not self._curr: 6112 return None 6113 6114 comments = self._curr.comments 6115 prev = self._prev 6116 token = self._curr 6117 token_type = self._curr.token_type 6118 this = self._curr.text 6119 upper = this.upper() 6120 6121 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6122 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6123 self._advance() 6124 return self._parse_window(parser(self)) 6125 6126 if not self._next or self._next.token_type != TokenType.L_PAREN: 6127 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6128 self._advance() 6129 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 6130 6131 return None 6132 6133 if any_token: 6134 if token_type in self.RESERVED_TOKENS: 6135 return None 6136 elif token_type not in self.FUNC_TOKENS: 6137 return None 6138 6139 self._advance(2) 6140 6141 parser = self.FUNCTION_PARSERS.get(upper) 6142 if parser and not anonymous: 6143 this = parser(self) 6144 else: 6145 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6146 6147 if subquery_predicate: 6148 expr = None 6149 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 6150 expr = self._parse_select() 6151 self._match_r_paren() 6152 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6153 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6154 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6155 self._advance(-1) 6156 expr = self._parse_bitwise() 6157 6158 if expr: 6159 return self.expression(subquery_predicate, comments=comments, this=expr) 6160 6161 if functions is None: 6162 functions = self.FUNCTIONS 6163 6164 function = functions.get(upper) 6165 known_function = function and not anonymous 6166 6167 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6168 args = self._parse_function_args(alias) 6169 6170 post_func_comments = self._curr and self._curr.comments 6171 if known_function and post_func_comments: 6172 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6173 # call we'll construct it as exp.Anonymous, even if it's "known" 6174 if any( 6175 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6176 for comment in post_func_comments 6177 ): 6178 known_function = False 6179 6180 if alias and known_function: 6181 args = self._kv_to_prop_eq(args) 6182 6183 if known_function: 6184 func_builder = t.cast(t.Callable, function) 6185 6186 if "dialect" in func_builder.__code__.co_varnames: 6187 func = func_builder(args, dialect=self.dialect) 6188 else: 6189 func = func_builder(args) 6190 6191 func = self.validate_expression(func, args) 6192 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6193 func.meta["name"] = this 6194 6195 this = func 6196 else: 6197 if token_type == TokenType.IDENTIFIER: 6198 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6199 6200 this = self.expression(exp.Anonymous, this=this, expressions=args) 6201 6202 this = this.update_positions(token) 6203 6204 if isinstance(this, exp.Expression): 6205 this.add_comments(comments) 6206 6207 self._match_r_paren(this) 6208 return self._parse_window(this) 6209 6210 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6211 return expression 6212 6213 def _kv_to_prop_eq( 6214 self, expressions: t.List[exp.Expression], parse_map: bool = False 6215 ) -> t.List[exp.Expression]: 6216 transformed = [] 6217 6218 for index, e in enumerate(expressions): 6219 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6220 if isinstance(e, exp.Alias): 6221 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6222 6223 if not isinstance(e, exp.PropertyEQ): 6224 e = self.expression( 6225 exp.PropertyEQ, 6226 this=e.this if parse_map else exp.to_identifier(e.this.name), 6227 expression=e.expression, 6228 ) 6229 6230 if isinstance(e.this, exp.Column): 6231 e.this.replace(e.this.this) 6232 else: 6233 e = self._to_prop_eq(e, index) 6234 6235 transformed.append(e) 6236 6237 return transformed 6238 6239 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6240 return self._parse_statement() 6241 6242 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6243 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6244 6245 def _parse_user_defined_function( 6246 self, kind: t.Optional[TokenType] = None 6247 ) -> t.Optional[exp.Expression]: 6248 this = self._parse_table_parts(schema=True) 6249 6250 if not self._match(TokenType.L_PAREN): 6251 return this 6252 6253 expressions = self._parse_csv(self._parse_function_parameter) 6254 self._match_r_paren() 6255 return self.expression( 6256 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6257 ) 6258 6259 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6260 literal = self._parse_primary() 6261 if literal: 6262 return self.expression(exp.Introducer, token=token, expression=literal) 6263 6264 return self._identifier_expression(token) 6265 6266 def _parse_session_parameter(self) -> exp.SessionParameter: 6267 kind = None 6268 this = self._parse_id_var() or self._parse_primary() 6269 6270 if this and self._match(TokenType.DOT): 6271 kind = this.name 6272 this = self._parse_var() or self._parse_primary() 6273 6274 return self.expression(exp.SessionParameter, this=this, kind=kind) 6275 6276 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6277 return self._parse_id_var() 6278 6279 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6280 index = self._index 6281 6282 if self._match(TokenType.L_PAREN): 6283 expressions = t.cast( 6284 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6285 ) 6286 6287 if not self._match(TokenType.R_PAREN): 6288 self._retreat(index) 6289 else: 6290 expressions = [self._parse_lambda_arg()] 6291 6292 if self._match_set(self.LAMBDAS): 6293 return self.LAMBDAS[self._prev.token_type](self, expressions) 6294 6295 self._retreat(index) 6296 6297 this: t.Optional[exp.Expression] 6298 6299 if self._match(TokenType.DISTINCT): 6300 this = self.expression( 6301 exp.Distinct, expressions=self._parse_csv(self._parse_disjunction) 6302 ) 6303 else: 6304 this = self._parse_select_or_expression(alias=alias) 6305 6306 return self._parse_limit( 6307 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6308 ) 6309 6310 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6311 index = self._index 6312 if not self._match(TokenType.L_PAREN): 6313 return this 6314 6315 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6316 # expr can be of both types 6317 if self._match_set(self.SELECT_START_TOKENS): 6318 self._retreat(index) 6319 return this 6320 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6321 self._match_r_paren() 6322 return self.expression(exp.Schema, this=this, expressions=args) 6323 6324 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6325 return self._parse_column_def(self._parse_field(any_token=True)) 6326 6327 def _parse_column_def( 6328 self, this: t.Optional[exp.Expression], computed_column: bool = True 6329 ) -> t.Optional[exp.Expression]: 6330 # column defs are not really columns, they're identifiers 6331 if isinstance(this, exp.Column): 6332 this = this.this 6333 6334 if not computed_column: 6335 self._match(TokenType.ALIAS) 6336 6337 kind = self._parse_types(schema=True) 6338 6339 if self._match_text_seq("FOR", "ORDINALITY"): 6340 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6341 6342 constraints: t.List[exp.Expression] = [] 6343 6344 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6345 ("ALIAS", "MATERIALIZED") 6346 ): 6347 persisted = self._prev.text.upper() == "MATERIALIZED" 6348 constraint_kind = exp.ComputedColumnConstraint( 6349 this=self._parse_disjunction(), 6350 persisted=persisted or self._match_text_seq("PERSISTED"), 6351 data_type=exp.Var(this="AUTO") 6352 if self._match_text_seq("AUTO") 6353 else self._parse_types(), 6354 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6355 ) 6356 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6357 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 6358 in_out_constraint = self.expression( 6359 exp.InOutColumnConstraint, 6360 input_=self._match(TokenType.IN), 6361 output=self._match(TokenType.OUT), 6362 ) 6363 constraints.append(in_out_constraint) 6364 kind = self._parse_types() 6365 elif ( 6366 kind 6367 and self._match(TokenType.ALIAS, advance=False) 6368 and ( 6369 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6370 or (self._next and self._next.token_type == TokenType.L_PAREN) 6371 ) 6372 ): 6373 self._advance() 6374 constraints.append( 6375 self.expression( 6376 exp.ColumnConstraint, 6377 kind=exp.ComputedColumnConstraint( 6378 this=self._parse_disjunction(), 6379 persisted=self._match_texts(("STORED", "VIRTUAL")) 6380 and self._prev.text.upper() == "STORED", 6381 ), 6382 ) 6383 ) 6384 6385 while True: 6386 constraint = self._parse_column_constraint() 6387 if not constraint: 6388 break 6389 constraints.append(constraint) 6390 6391 if not kind and not constraints: 6392 return this 6393 6394 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6395 6396 def _parse_auto_increment( 6397 self, 6398 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6399 start = None 6400 increment = None 6401 order = None 6402 6403 if self._match(TokenType.L_PAREN, advance=False): 6404 args = self._parse_wrapped_csv(self._parse_bitwise) 6405 start = seq_get(args, 0) 6406 increment = seq_get(args, 1) 6407 elif self._match_text_seq("START"): 6408 start = self._parse_bitwise() 6409 self._match_text_seq("INCREMENT") 6410 increment = self._parse_bitwise() 6411 if self._match_text_seq("ORDER"): 6412 order = True 6413 elif self._match_text_seq("NOORDER"): 6414 order = False 6415 6416 if start and increment: 6417 return exp.GeneratedAsIdentityColumnConstraint( 6418 start=start, increment=increment, this=False, order=order 6419 ) 6420 6421 return exp.AutoIncrementColumnConstraint() 6422 6423 def _parse_check_constraint(self) -> t.Optional[exp.CheckColumnConstraint]: 6424 if not self._match(TokenType.L_PAREN, advance=False): 6425 return None 6426 6427 return self.expression( 6428 exp.CheckColumnConstraint, 6429 this=self._parse_wrapped(self._parse_assignment), 6430 enforced=self._match_text_seq("ENFORCED"), 6431 ) 6432 6433 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6434 if not self._match_text_seq("REFRESH"): 6435 self._retreat(self._index - 1) 6436 return None 6437 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6438 6439 def _parse_compress(self) -> exp.CompressColumnConstraint: 6440 if self._match(TokenType.L_PAREN, advance=False): 6441 return self.expression( 6442 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6443 ) 6444 6445 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6446 6447 def _parse_generated_as_identity( 6448 self, 6449 ) -> ( 6450 exp.GeneratedAsIdentityColumnConstraint 6451 | exp.ComputedColumnConstraint 6452 | exp.GeneratedAsRowColumnConstraint 6453 ): 6454 if self._match_text_seq("BY", "DEFAULT"): 6455 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6456 this = self.expression( 6457 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6458 ) 6459 else: 6460 self._match_text_seq("ALWAYS") 6461 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6462 6463 self._match(TokenType.ALIAS) 6464 6465 if self._match_text_seq("ROW"): 6466 start = self._match_text_seq("START") 6467 if not start: 6468 self._match(TokenType.END) 6469 hidden = self._match_text_seq("HIDDEN") 6470 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6471 6472 identity = self._match_text_seq("IDENTITY") 6473 6474 if self._match(TokenType.L_PAREN): 6475 if self._match(TokenType.START_WITH): 6476 this.set("start", self._parse_bitwise()) 6477 if self._match_text_seq("INCREMENT", "BY"): 6478 this.set("increment", self._parse_bitwise()) 6479 if self._match_text_seq("MINVALUE"): 6480 this.set("minvalue", self._parse_bitwise()) 6481 if self._match_text_seq("MAXVALUE"): 6482 this.set("maxvalue", self._parse_bitwise()) 6483 6484 if self._match_text_seq("CYCLE"): 6485 this.set("cycle", True) 6486 elif self._match_text_seq("NO", "CYCLE"): 6487 this.set("cycle", False) 6488 6489 if not identity: 6490 this.set("expression", self._parse_range()) 6491 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6492 args = self._parse_csv(self._parse_bitwise) 6493 this.set("start", seq_get(args, 0)) 6494 this.set("increment", seq_get(args, 1)) 6495 6496 self._match_r_paren() 6497 6498 return this 6499 6500 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6501 self._match_text_seq("LENGTH") 6502 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6503 6504 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6505 if self._match_text_seq("NULL"): 6506 return self.expression(exp.NotNullColumnConstraint) 6507 if self._match_text_seq("CASESPECIFIC"): 6508 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6509 if self._match_text_seq("FOR", "REPLICATION"): 6510 return self.expression(exp.NotForReplicationColumnConstraint) 6511 6512 # Unconsume the `NOT` token 6513 self._retreat(self._index - 1) 6514 return None 6515 6516 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6517 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6518 6519 procedure_option_follows = ( 6520 self._match(TokenType.WITH, advance=False) 6521 and self._next 6522 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6523 ) 6524 6525 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6526 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 6527 if not constraint: 6528 self._retreat(self._index - 1) 6529 return None 6530 6531 return self.expression(exp.ColumnConstraint, this=this, kind=constraint) 6532 6533 return this 6534 6535 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6536 if not self._match(TokenType.CONSTRAINT): 6537 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6538 6539 return self.expression( 6540 exp.Constraint, 6541 this=self._parse_id_var(), 6542 expressions=self._parse_unnamed_constraints(), 6543 ) 6544 6545 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6546 constraints = [] 6547 while True: 6548 constraint = self._parse_unnamed_constraint() or self._parse_function() 6549 if not constraint: 6550 break 6551 constraints.append(constraint) 6552 6553 return constraints 6554 6555 def _parse_unnamed_constraint( 6556 self, constraints: t.Optional[t.Collection[str]] = None 6557 ) -> t.Optional[exp.Expression]: 6558 index = self._index 6559 6560 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6561 constraints or self.CONSTRAINT_PARSERS 6562 ): 6563 return None 6564 6565 constraint = self._prev.text.upper() 6566 if constraint not in self.CONSTRAINT_PARSERS: 6567 self.raise_error(f"No parser found for schema constraint {constraint}.") 6568 6569 constraint = self.CONSTRAINT_PARSERS[constraint](self) 6570 if not constraint: 6571 self._retreat(index) 6572 6573 return constraint 6574 6575 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6576 return self._parse_id_var(any_token=False) 6577 6578 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6579 self._match_texts(("KEY", "INDEX")) 6580 return self.expression( 6581 exp.UniqueColumnConstraint, 6582 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6583 this=self._parse_schema(self._parse_unique_key()), 6584 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6585 on_conflict=self._parse_on_conflict(), 6586 options=self._parse_key_constraint_options(), 6587 ) 6588 6589 def _parse_key_constraint_options(self) -> t.List[str]: 6590 options = [] 6591 while True: 6592 if not self._curr: 6593 break 6594 6595 if self._match(TokenType.ON): 6596 action = None 6597 on = self._advance_any() and self._prev.text 6598 6599 if self._match_text_seq("NO", "ACTION"): 6600 action = "NO ACTION" 6601 elif self._match_text_seq("CASCADE"): 6602 action = "CASCADE" 6603 elif self._match_text_seq("RESTRICT"): 6604 action = "RESTRICT" 6605 elif self._match_pair(TokenType.SET, TokenType.NULL): 6606 action = "SET NULL" 6607 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6608 action = "SET DEFAULT" 6609 else: 6610 self.raise_error("Invalid key constraint") 6611 6612 options.append(f"ON {on} {action}") 6613 else: 6614 var = self._parse_var_from_options( 6615 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6616 ) 6617 if not var: 6618 break 6619 options.append(var.name) 6620 6621 return options 6622 6623 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6624 if match and not self._match(TokenType.REFERENCES): 6625 return None 6626 6627 expressions = None 6628 this = self._parse_table(schema=True) 6629 options = self._parse_key_constraint_options() 6630 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6631 6632 def _parse_foreign_key(self) -> exp.ForeignKey: 6633 expressions = ( 6634 self._parse_wrapped_id_vars() 6635 if not self._match(TokenType.REFERENCES, advance=False) 6636 else None 6637 ) 6638 reference = self._parse_references() 6639 on_options = {} 6640 6641 while self._match(TokenType.ON): 6642 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6643 self.raise_error("Expected DELETE or UPDATE") 6644 6645 kind = self._prev.text.lower() 6646 6647 if self._match_text_seq("NO", "ACTION"): 6648 action = "NO ACTION" 6649 elif self._match(TokenType.SET): 6650 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6651 action = "SET " + self._prev.text.upper() 6652 else: 6653 self._advance() 6654 action = self._prev.text.upper() 6655 6656 on_options[kind] = action 6657 6658 return self.expression( 6659 exp.ForeignKey, 6660 expressions=expressions, 6661 reference=reference, 6662 options=self._parse_key_constraint_options(), 6663 **on_options, # type: ignore 6664 ) 6665 6666 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6667 return self._parse_field() 6668 6669 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6670 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6671 self._retreat(self._index - 1) 6672 return None 6673 6674 id_vars = self._parse_wrapped_id_vars() 6675 return self.expression( 6676 exp.PeriodForSystemTimeConstraint, 6677 this=seq_get(id_vars, 0), 6678 expression=seq_get(id_vars, 1), 6679 ) 6680 6681 def _parse_primary_key( 6682 self, wrapped_optional: bool = False, in_props: bool = False 6683 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6684 desc = ( 6685 self._match_set((TokenType.ASC, TokenType.DESC)) 6686 and self._prev.token_type == TokenType.DESC 6687 ) 6688 6689 this = None 6690 if ( 6691 self._curr.text.upper() not in self.CONSTRAINT_PARSERS 6692 and self._next 6693 and self._next.token_type == TokenType.L_PAREN 6694 ): 6695 this = self._parse_id_var() 6696 6697 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6698 return self.expression( 6699 exp.PrimaryKeyColumnConstraint, 6700 desc=desc, 6701 options=self._parse_key_constraint_options(), 6702 ) 6703 6704 expressions = self._parse_wrapped_csv( 6705 self._parse_primary_key_part, optional=wrapped_optional 6706 ) 6707 6708 return self.expression( 6709 exp.PrimaryKey, 6710 this=this, 6711 expressions=expressions, 6712 include=self._parse_index_params(), 6713 options=self._parse_key_constraint_options(), 6714 ) 6715 6716 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6717 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 6718 6719 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6720 """ 6721 Parses a datetime column in ODBC format. We parse the column into the corresponding 6722 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6723 same as we did for `DATE('yyyy-mm-dd')`. 6724 6725 Reference: 6726 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6727 """ 6728 self._match(TokenType.VAR) 6729 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6730 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6731 if not self._match(TokenType.R_BRACE): 6732 self.raise_error("Expected }") 6733 return expression 6734 6735 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6736 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6737 return this 6738 6739 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6740 map_token = seq_get(self._tokens, self._index - 2) 6741 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6742 else: 6743 parse_map = False 6744 6745 bracket_kind = self._prev.token_type 6746 if ( 6747 bracket_kind == TokenType.L_BRACE 6748 and self._curr 6749 and self._curr.token_type == TokenType.VAR 6750 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6751 ): 6752 return self._parse_odbc_datetime_literal() 6753 6754 expressions = self._parse_csv( 6755 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6756 ) 6757 6758 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6759 self.raise_error("Expected ]") 6760 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6761 self.raise_error("Expected }") 6762 6763 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6764 if bracket_kind == TokenType.L_BRACE: 6765 this = self.expression( 6766 exp.Struct, 6767 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6768 ) 6769 elif not this: 6770 this = build_array_constructor( 6771 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6772 ) 6773 else: 6774 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6775 if constructor_type: 6776 return build_array_constructor( 6777 constructor_type, 6778 args=expressions, 6779 bracket_kind=bracket_kind, 6780 dialect=self.dialect, 6781 ) 6782 6783 expressions = apply_index_offset( 6784 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6785 ) 6786 this = self.expression( 6787 exp.Bracket, 6788 this=this, 6789 expressions=expressions, 6790 comments=this.pop_comments(), 6791 ) 6792 6793 self._add_comments(this) 6794 return self._parse_bracket(this) 6795 6796 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6797 if not self._match(TokenType.COLON): 6798 return this 6799 6800 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 6801 self._advance() 6802 end: t.Optional[exp.Expression] = -exp.Literal.number("1") 6803 else: 6804 end = self._parse_assignment() 6805 step = self._parse_unary() if self._match(TokenType.COLON) else None 6806 return self.expression(exp.Slice, this=this, expression=end, step=step) 6807 6808 def _parse_case(self) -> t.Optional[exp.Expression]: 6809 if self._match(TokenType.DOT, advance=False): 6810 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6811 self._retreat(self._index - 1) 6812 return None 6813 6814 ifs = [] 6815 default = None 6816 6817 comments = self._prev_comments 6818 expression = self._parse_disjunction() 6819 6820 while self._match(TokenType.WHEN): 6821 this = self._parse_disjunction() 6822 self._match(TokenType.THEN) 6823 then = self._parse_disjunction() 6824 ifs.append(self.expression(exp.If, this=this, true=then)) 6825 6826 if self._match(TokenType.ELSE): 6827 default = self._parse_disjunction() 6828 6829 if not self._match(TokenType.END): 6830 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6831 default = exp.column("interval") 6832 else: 6833 self.raise_error("Expected END after CASE", self._prev) 6834 6835 return self.expression( 6836 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6837 ) 6838 6839 def _parse_if(self) -> t.Optional[exp.Expression]: 6840 if self._match(TokenType.L_PAREN): 6841 args = self._parse_csv( 6842 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6843 ) 6844 this = self.validate_expression(exp.If.from_arg_list(args), args) 6845 self._match_r_paren() 6846 else: 6847 index = self._index - 1 6848 6849 if self.NO_PAREN_IF_COMMANDS and index == 0: 6850 return self._parse_as_command(self._prev) 6851 6852 condition = self._parse_disjunction() 6853 6854 if not condition: 6855 self._retreat(index) 6856 return None 6857 6858 self._match(TokenType.THEN) 6859 true = self._parse_disjunction() 6860 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 6861 self._match(TokenType.END) 6862 this = self.expression(exp.If, this=condition, true=true, false=false) 6863 6864 return this 6865 6866 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6867 if not self._match_text_seq("VALUE", "FOR"): 6868 self._retreat(self._index - 1) 6869 return None 6870 6871 return self.expression( 6872 exp.NextValueFor, 6873 this=self._parse_column(), 6874 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6875 ) 6876 6877 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6878 this = self._parse_function() or self._parse_var_or_string(upper=True) 6879 6880 if self._match(TokenType.FROM): 6881 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6882 6883 if not self._match(TokenType.COMMA): 6884 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6885 6886 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6887 6888 def _parse_gap_fill(self) -> exp.GapFill: 6889 self._match(TokenType.TABLE) 6890 this = self._parse_table() 6891 6892 self._match(TokenType.COMMA) 6893 args = [this, *self._parse_csv(self._parse_lambda)] 6894 6895 gap_fill = exp.GapFill.from_arg_list(args) 6896 return self.validate_expression(gap_fill, args) 6897 6898 def _parse_char(self) -> exp.Chr: 6899 return self.expression( 6900 exp.Chr, 6901 expressions=self._parse_csv(self._parse_assignment), 6902 charset=self._match(TokenType.USING) and self._parse_var(), 6903 ) 6904 6905 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6906 this = self._parse_disjunction() 6907 6908 if not self._match(TokenType.ALIAS): 6909 if self._match(TokenType.COMMA): 6910 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6911 6912 self.raise_error("Expected AS after CAST") 6913 6914 fmt = None 6915 to = self._parse_types() 6916 6917 default = self._match(TokenType.DEFAULT) 6918 if default: 6919 default = self._parse_bitwise() 6920 self._match_text_seq("ON", "CONVERSION", "ERROR") 6921 6922 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6923 fmt_string = self._parse_string() 6924 fmt = self._parse_at_time_zone(fmt_string) 6925 6926 if not to: 6927 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6928 if to.this in exp.DataType.TEMPORAL_TYPES: 6929 this = self.expression( 6930 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6931 this=this, 6932 format=exp.Literal.string( 6933 format_time( 6934 fmt_string.this if fmt_string else "", 6935 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6936 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6937 ) 6938 ), 6939 safe=safe, 6940 ) 6941 6942 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6943 this.set("zone", fmt.args["zone"]) 6944 return this 6945 elif not to: 6946 self.raise_error("Expected TYPE after CAST") 6947 elif isinstance(to, exp.Identifier): 6948 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6949 elif to.this == exp.DataType.Type.CHAR: 6950 if self._match(TokenType.CHARACTER_SET): 6951 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6952 6953 return self.build_cast( 6954 strict=strict, 6955 this=this, 6956 to=to, 6957 format=fmt, 6958 safe=safe, 6959 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6960 default=default, 6961 ) 6962 6963 def _parse_string_agg(self) -> exp.GroupConcat: 6964 if self._match(TokenType.DISTINCT): 6965 args: t.List[t.Optional[exp.Expression]] = [ 6966 self.expression(exp.Distinct, expressions=[self._parse_disjunction()]) 6967 ] 6968 if self._match(TokenType.COMMA): 6969 args.extend(self._parse_csv(self._parse_disjunction)) 6970 else: 6971 args = self._parse_csv(self._parse_disjunction) # type: ignore 6972 6973 if self._match_text_seq("ON", "OVERFLOW"): 6974 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6975 if self._match_text_seq("ERROR"): 6976 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6977 else: 6978 self._match_text_seq("TRUNCATE") 6979 on_overflow = self.expression( 6980 exp.OverflowTruncateBehavior, 6981 this=self._parse_string(), 6982 with_count=( 6983 self._match_text_seq("WITH", "COUNT") 6984 or not self._match_text_seq("WITHOUT", "COUNT") 6985 ), 6986 ) 6987 else: 6988 on_overflow = None 6989 6990 index = self._index 6991 if not self._match(TokenType.R_PAREN) and args: 6992 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6993 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6994 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6995 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6996 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6997 6998 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6999 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7000 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7001 if not self._match_text_seq("WITHIN", "GROUP"): 7002 self._retreat(index) 7003 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7004 7005 # The corresponding match_r_paren will be called in parse_function (caller) 7006 self._match_l_paren() 7007 7008 return self.expression( 7009 exp.GroupConcat, 7010 this=self._parse_order(this=seq_get(args, 0)), 7011 separator=seq_get(args, 1), 7012 on_overflow=on_overflow, 7013 ) 7014 7015 def _parse_convert( 7016 self, strict: bool, safe: t.Optional[bool] = None 7017 ) -> t.Optional[exp.Expression]: 7018 this = self._parse_bitwise() 7019 7020 if self._match(TokenType.USING): 7021 to: t.Optional[exp.Expression] = self.expression( 7022 exp.CharacterSet, this=self._parse_var(tokens={TokenType.BINARY}) 7023 ) 7024 elif self._match(TokenType.COMMA): 7025 to = self._parse_types() 7026 else: 7027 to = None 7028 7029 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7030 7031 def _parse_xml_element(self) -> exp.XMLElement: 7032 if self._match_text_seq("EVALNAME"): 7033 evalname = True 7034 this = self._parse_bitwise() 7035 else: 7036 evalname = None 7037 self._match_text_seq("NAME") 7038 this = self._parse_id_var() 7039 7040 return self.expression( 7041 exp.XMLElement, 7042 this=this, 7043 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7044 evalname=evalname, 7045 ) 7046 7047 def _parse_xml_table(self) -> exp.XMLTable: 7048 namespaces = None 7049 passing = None 7050 columns = None 7051 7052 if self._match_text_seq("XMLNAMESPACES", "("): 7053 namespaces = self._parse_xml_namespace() 7054 self._match_text_seq(")", ",") 7055 7056 this = self._parse_string() 7057 7058 if self._match_text_seq("PASSING"): 7059 # The BY VALUE keywords are optional and are provided for semantic clarity 7060 self._match_text_seq("BY", "VALUE") 7061 passing = self._parse_csv(self._parse_column) 7062 7063 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7064 7065 if self._match_text_seq("COLUMNS"): 7066 columns = self._parse_csv(self._parse_field_def) 7067 7068 return self.expression( 7069 exp.XMLTable, 7070 this=this, 7071 namespaces=namespaces, 7072 passing=passing, 7073 columns=columns, 7074 by_ref=by_ref, 7075 ) 7076 7077 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 7078 namespaces = [] 7079 7080 while True: 7081 if self._match(TokenType.DEFAULT): 7082 uri = self._parse_string() 7083 else: 7084 uri = self._parse_alias(self._parse_string()) 7085 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 7086 if not self._match(TokenType.COMMA): 7087 break 7088 7089 return namespaces 7090 7091 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 7092 args = self._parse_csv(self._parse_disjunction) 7093 7094 if len(args) < 3: 7095 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 7096 7097 return self.expression(exp.DecodeCase, expressions=args) 7098 7099 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 7100 self._match_text_seq("KEY") 7101 key = self._parse_column() 7102 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7103 self._match_text_seq("VALUE") 7104 value = self._parse_bitwise() 7105 7106 if not key and not value: 7107 return None 7108 return self.expression(exp.JSONKeyValue, this=key, expression=value) 7109 7110 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7111 if not this or not self._match_text_seq("FORMAT", "JSON"): 7112 return this 7113 7114 return self.expression(exp.FormatJson, this=this) 7115 7116 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 7117 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7118 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7119 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7120 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7121 else: 7122 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7123 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7124 7125 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7126 7127 if not empty and not error and not null: 7128 return None 7129 7130 return self.expression( 7131 exp.OnCondition, 7132 empty=empty, 7133 error=error, 7134 null=null, 7135 ) 7136 7137 def _parse_on_handling( 7138 self, on: str, *values: str 7139 ) -> t.Optional[str] | t.Optional[exp.Expression]: 7140 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7141 for value in values: 7142 if self._match_text_seq(value, "ON", on): 7143 return f"{value} ON {on}" 7144 7145 index = self._index 7146 if self._match(TokenType.DEFAULT): 7147 default_value = self._parse_bitwise() 7148 if self._match_text_seq("ON", on): 7149 return default_value 7150 7151 self._retreat(index) 7152 7153 return None 7154 7155 @t.overload 7156 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 7157 7158 @t.overload 7159 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 7160 7161 def _parse_json_object(self, agg=False): 7162 star = self._parse_star() 7163 expressions = ( 7164 [star] 7165 if star 7166 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7167 ) 7168 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7169 7170 unique_keys = None 7171 if self._match_text_seq("WITH", "UNIQUE"): 7172 unique_keys = True 7173 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7174 unique_keys = False 7175 7176 self._match_text_seq("KEYS") 7177 7178 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7179 self._parse_type() 7180 ) 7181 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7182 7183 return self.expression( 7184 exp.JSONObjectAgg if agg else exp.JSONObject, 7185 expressions=expressions, 7186 null_handling=null_handling, 7187 unique_keys=unique_keys, 7188 return_type=return_type, 7189 encoding=encoding, 7190 ) 7191 7192 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7193 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7194 if not self._match_text_seq("NESTED"): 7195 this = self._parse_id_var() 7196 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7197 kind = self._parse_types(allow_identifiers=False) 7198 nested = None 7199 else: 7200 this = None 7201 ordinality = None 7202 kind = None 7203 nested = True 7204 7205 path = self._match_text_seq("PATH") and self._parse_string() 7206 nested_schema = nested and self._parse_json_schema() 7207 7208 return self.expression( 7209 exp.JSONColumnDef, 7210 this=this, 7211 kind=kind, 7212 path=path, 7213 nested_schema=nested_schema, 7214 ordinality=ordinality, 7215 ) 7216 7217 def _parse_json_schema(self) -> exp.JSONSchema: 7218 self._match_text_seq("COLUMNS") 7219 return self.expression( 7220 exp.JSONSchema, 7221 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 7222 ) 7223 7224 def _parse_json_table(self) -> exp.JSONTable: 7225 this = self._parse_format_json(self._parse_bitwise()) 7226 path = self._match(TokenType.COMMA) and self._parse_string() 7227 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7228 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7229 schema = self._parse_json_schema() 7230 7231 return exp.JSONTable( 7232 this=this, 7233 schema=schema, 7234 path=path, 7235 error_handling=error_handling, 7236 empty_handling=empty_handling, 7237 ) 7238 7239 def _parse_match_against(self) -> exp.MatchAgainst: 7240 if self._match_text_seq("TABLE"): 7241 # parse SingleStore MATCH(TABLE ...) syntax 7242 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7243 expressions = [] 7244 table = self._parse_table() 7245 if table: 7246 expressions = [table] 7247 else: 7248 expressions = self._parse_csv(self._parse_column) 7249 7250 self._match_text_seq(")", "AGAINST", "(") 7251 7252 this = self._parse_string() 7253 7254 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7255 modifier = "IN NATURAL LANGUAGE MODE" 7256 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7257 modifier = f"{modifier} WITH QUERY EXPANSION" 7258 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7259 modifier = "IN BOOLEAN MODE" 7260 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7261 modifier = "WITH QUERY EXPANSION" 7262 else: 7263 modifier = None 7264 7265 return self.expression( 7266 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 7267 ) 7268 7269 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7270 def _parse_open_json(self) -> exp.OpenJSON: 7271 this = self._parse_bitwise() 7272 path = self._match(TokenType.COMMA) and self._parse_string() 7273 7274 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7275 this = self._parse_field(any_token=True) 7276 kind = self._parse_types() 7277 path = self._parse_string() 7278 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7279 7280 return self.expression( 7281 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7282 ) 7283 7284 expressions = None 7285 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7286 self._match_l_paren() 7287 expressions = self._parse_csv(_parse_open_json_column_def) 7288 7289 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7290 7291 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7292 args = self._parse_csv(self._parse_bitwise) 7293 7294 if self._match(TokenType.IN): 7295 return self.expression( 7296 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7297 ) 7298 7299 if haystack_first: 7300 haystack = seq_get(args, 0) 7301 needle = seq_get(args, 1) 7302 else: 7303 haystack = seq_get(args, 1) 7304 needle = seq_get(args, 0) 7305 7306 return self.expression( 7307 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7308 ) 7309 7310 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7311 args = self._parse_csv(self._parse_table) 7312 return exp.JoinHint(this=func_name.upper(), expressions=args) 7313 7314 def _parse_substring(self) -> exp.Substring: 7315 # Postgres supports the form: substring(string [from int] [for int]) 7316 # (despite being undocumented, the reverse order also works) 7317 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7318 7319 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7320 7321 start, length = None, None 7322 7323 while self._curr: 7324 if self._match(TokenType.FROM): 7325 start = self._parse_bitwise() 7326 elif self._match(TokenType.FOR): 7327 if not start: 7328 start = exp.Literal.number(1) 7329 length = self._parse_bitwise() 7330 else: 7331 break 7332 7333 if start: 7334 args.append(start) 7335 if length: 7336 args.append(length) 7337 7338 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7339 7340 def _parse_trim(self) -> exp.Trim: 7341 # https://www.w3resource.com/sql/character-functions/trim.php 7342 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7343 7344 position = None 7345 collation = None 7346 expression = None 7347 7348 if self._match_texts(self.TRIM_TYPES): 7349 position = self._prev.text.upper() 7350 7351 this = self._parse_bitwise() 7352 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7353 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7354 expression = self._parse_bitwise() 7355 7356 if invert_order: 7357 this, expression = expression, this 7358 7359 if self._match(TokenType.COLLATE): 7360 collation = self._parse_bitwise() 7361 7362 return self.expression( 7363 exp.Trim, this=this, position=position, expression=expression, collation=collation 7364 ) 7365 7366 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7367 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7368 7369 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7370 return self._parse_window(self._parse_id_var(), alias=True) 7371 7372 def _parse_respect_or_ignore_nulls( 7373 self, this: t.Optional[exp.Expression] 7374 ) -> t.Optional[exp.Expression]: 7375 if self._match_text_seq("IGNORE", "NULLS"): 7376 return self.expression(exp.IgnoreNulls, this=this) 7377 if self._match_text_seq("RESPECT", "NULLS"): 7378 return self.expression(exp.RespectNulls, this=this) 7379 return this 7380 7381 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7382 if self._match(TokenType.HAVING): 7383 self._match_texts(("MAX", "MIN")) 7384 max = self._prev.text.upper() != "MIN" 7385 return self.expression( 7386 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7387 ) 7388 7389 return this 7390 7391 def _parse_window( 7392 self, this: t.Optional[exp.Expression], alias: bool = False 7393 ) -> t.Optional[exp.Expression]: 7394 func = this 7395 comments = func.comments if isinstance(func, exp.Expression) else None 7396 7397 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7398 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7399 if self._match_text_seq("WITHIN", "GROUP"): 7400 order = self._parse_wrapped(self._parse_order) 7401 this = self.expression(exp.WithinGroup, this=this, expression=order) 7402 7403 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7404 self._match(TokenType.WHERE) 7405 this = self.expression( 7406 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7407 ) 7408 self._match_r_paren() 7409 7410 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7411 # Some dialects choose to implement and some do not. 7412 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7413 7414 # There is some code above in _parse_lambda that handles 7415 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7416 7417 # The below changes handle 7418 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7419 7420 # Oracle allows both formats 7421 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7422 # and Snowflake chose to do the same for familiarity 7423 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7424 if isinstance(this, exp.AggFunc): 7425 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7426 7427 if ignore_respect and ignore_respect is not this: 7428 ignore_respect.replace(ignore_respect.this) 7429 this = self.expression(ignore_respect.__class__, this=this) 7430 7431 this = self._parse_respect_or_ignore_nulls(this) 7432 7433 # bigquery select from window x AS (partition by ...) 7434 if alias: 7435 over = None 7436 self._match(TokenType.ALIAS) 7437 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7438 return this 7439 else: 7440 over = self._prev.text.upper() 7441 7442 if comments and isinstance(func, exp.Expression): 7443 func.pop_comments() 7444 7445 if not self._match(TokenType.L_PAREN): 7446 return self.expression( 7447 exp.Window, 7448 comments=comments, 7449 this=this, 7450 alias=self._parse_id_var(False), 7451 over=over, 7452 ) 7453 7454 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7455 7456 first = self._match(TokenType.FIRST) 7457 if self._match_text_seq("LAST"): 7458 first = False 7459 7460 partition, order = self._parse_partition_and_order() 7461 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7462 7463 if kind: 7464 self._match(TokenType.BETWEEN) 7465 start = self._parse_window_spec() 7466 7467 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7468 exclude = ( 7469 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7470 if self._match_text_seq("EXCLUDE") 7471 else None 7472 ) 7473 7474 spec = self.expression( 7475 exp.WindowSpec, 7476 kind=kind, 7477 start=start["value"], 7478 start_side=start["side"], 7479 end=end.get("value"), 7480 end_side=end.get("side"), 7481 exclude=exclude, 7482 ) 7483 else: 7484 spec = None 7485 7486 self._match_r_paren() 7487 7488 window = self.expression( 7489 exp.Window, 7490 comments=comments, 7491 this=this, 7492 partition_by=partition, 7493 order=order, 7494 spec=spec, 7495 alias=window_alias, 7496 over=over, 7497 first=first, 7498 ) 7499 7500 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7501 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7502 return self._parse_window(window, alias=alias) 7503 7504 return window 7505 7506 def _parse_partition_and_order( 7507 self, 7508 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7509 return self._parse_partition_by(), self._parse_order() 7510 7511 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7512 self._match(TokenType.BETWEEN) 7513 7514 return { 7515 "value": ( 7516 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7517 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7518 or self._parse_bitwise() 7519 ), 7520 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7521 } 7522 7523 def _parse_alias( 7524 self, this: t.Optional[exp.Expression], explicit: bool = False 7525 ) -> t.Optional[exp.Expression]: 7526 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7527 # so this section tries to parse the clause version and if it fails, it treats the token 7528 # as an identifier (alias) 7529 if self._can_parse_limit_or_offset(): 7530 return this 7531 7532 any_token = self._match(TokenType.ALIAS) 7533 comments = self._prev_comments or [] 7534 7535 if explicit and not any_token: 7536 return this 7537 7538 if self._match(TokenType.L_PAREN): 7539 aliases = self.expression( 7540 exp.Aliases, 7541 comments=comments, 7542 this=this, 7543 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7544 ) 7545 self._match_r_paren(aliases) 7546 return aliases 7547 7548 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7549 self.STRING_ALIASES and self._parse_string_as_identifier() 7550 ) 7551 7552 if alias: 7553 comments.extend(alias.pop_comments()) 7554 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7555 column = this.this 7556 7557 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7558 if not this.comments and column and column.comments: 7559 this.comments = column.pop_comments() 7560 7561 return this 7562 7563 def _parse_id_var( 7564 self, 7565 any_token: bool = True, 7566 tokens: t.Optional[t.Collection[TokenType]] = None, 7567 ) -> t.Optional[exp.Expression]: 7568 expression = self._parse_identifier() 7569 if not expression and ( 7570 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7571 ): 7572 quoted = self._prev.token_type == TokenType.STRING 7573 expression = self._identifier_expression(quoted=quoted) 7574 7575 return expression 7576 7577 def _parse_string(self) -> t.Optional[exp.Expression]: 7578 if self._match_set(self.STRING_PARSERS): 7579 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7580 return self._parse_placeholder() 7581 7582 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7583 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7584 if output: 7585 output.update_positions(self._prev) 7586 return output 7587 7588 def _parse_number(self) -> t.Optional[exp.Expression]: 7589 if self._match_set(self.NUMERIC_PARSERS): 7590 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7591 return self._parse_placeholder() 7592 7593 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7594 if self._match(TokenType.IDENTIFIER): 7595 return self._identifier_expression(quoted=True) 7596 return self._parse_placeholder() 7597 7598 def _parse_var( 7599 self, 7600 any_token: bool = False, 7601 tokens: t.Optional[t.Collection[TokenType]] = None, 7602 upper: bool = False, 7603 ) -> t.Optional[exp.Expression]: 7604 if ( 7605 (any_token and self._advance_any()) 7606 or self._match(TokenType.VAR) 7607 or (self._match_set(tokens) if tokens else False) 7608 ): 7609 return self.expression( 7610 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7611 ) 7612 return self._parse_placeholder() 7613 7614 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7615 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7616 self._advance() 7617 return self._prev 7618 return None 7619 7620 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7621 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7622 7623 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7624 return self._parse_primary() or self._parse_var(any_token=True) 7625 7626 def _parse_null(self) -> t.Optional[exp.Expression]: 7627 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7628 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7629 return self._parse_placeholder() 7630 7631 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7632 if self._match(TokenType.TRUE): 7633 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7634 if self._match(TokenType.FALSE): 7635 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7636 return self._parse_placeholder() 7637 7638 def _parse_star(self) -> t.Optional[exp.Expression]: 7639 if self._match(TokenType.STAR): 7640 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7641 return self._parse_placeholder() 7642 7643 def _parse_parameter(self) -> exp.Parameter: 7644 this = self._parse_identifier() or self._parse_primary_or_var() 7645 return self.expression(exp.Parameter, this=this) 7646 7647 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7648 if self._match_set(self.PLACEHOLDER_PARSERS): 7649 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7650 if placeholder: 7651 return placeholder 7652 self._advance(-1) 7653 return None 7654 7655 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7656 if not self._match_texts(keywords): 7657 return None 7658 if self._match(TokenType.L_PAREN, advance=False): 7659 return self._parse_wrapped_csv(self._parse_expression) 7660 7661 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 7662 return [expression] if expression else None 7663 7664 def _parse_csv( 7665 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7666 ) -> t.List[exp.Expression]: 7667 parse_result = parse_method() 7668 items = [parse_result] if parse_result is not None else [] 7669 7670 while self._match(sep): 7671 self._add_comments(parse_result) 7672 parse_result = parse_method() 7673 if parse_result is not None: 7674 items.append(parse_result) 7675 7676 return items 7677 7678 def _parse_tokens( 7679 self, parse_method: t.Callable, expressions: t.Dict 7680 ) -> t.Optional[exp.Expression]: 7681 this = parse_method() 7682 7683 while self._match_set(expressions): 7684 this = self.expression( 7685 expressions[self._prev.token_type], 7686 this=this, 7687 comments=self._prev_comments, 7688 expression=parse_method(), 7689 ) 7690 7691 return this 7692 7693 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7694 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7695 7696 def _parse_wrapped_csv( 7697 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7698 ) -> t.List[exp.Expression]: 7699 return self._parse_wrapped( 7700 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7701 ) 7702 7703 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7704 wrapped = self._match(TokenType.L_PAREN) 7705 if not wrapped and not optional: 7706 self.raise_error("Expecting (") 7707 parse_result = parse_method() 7708 if wrapped: 7709 self._match_r_paren() 7710 return parse_result 7711 7712 def _parse_expressions(self) -> t.List[exp.Expression]: 7713 return self._parse_csv(self._parse_expression) 7714 7715 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7716 return ( 7717 self._parse_set_operations( 7718 self._parse_alias(self._parse_assignment(), explicit=True) 7719 if alias 7720 else self._parse_assignment() 7721 ) 7722 or self._parse_select() 7723 ) 7724 7725 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7726 return self._parse_query_modifiers( 7727 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7728 ) 7729 7730 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7731 this = None 7732 if self._match_texts(self.TRANSACTION_KIND): 7733 this = self._prev.text 7734 7735 self._match_texts(("TRANSACTION", "WORK")) 7736 7737 modes = [] 7738 while True: 7739 mode = [] 7740 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7741 mode.append(self._prev.text) 7742 7743 if mode: 7744 modes.append(" ".join(mode)) 7745 if not self._match(TokenType.COMMA): 7746 break 7747 7748 return self.expression(exp.Transaction, this=this, modes=modes) 7749 7750 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7751 chain = None 7752 savepoint = None 7753 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7754 7755 self._match_texts(("TRANSACTION", "WORK")) 7756 7757 if self._match_text_seq("TO"): 7758 self._match_text_seq("SAVEPOINT") 7759 savepoint = self._parse_id_var() 7760 7761 if self._match(TokenType.AND): 7762 chain = not self._match_text_seq("NO") 7763 self._match_text_seq("CHAIN") 7764 7765 if is_rollback: 7766 return self.expression(exp.Rollback, savepoint=savepoint) 7767 7768 return self.expression(exp.Commit, chain=chain) 7769 7770 def _parse_refresh(self) -> exp.Refresh | exp.Command: 7771 if self._match(TokenType.TABLE): 7772 kind = "TABLE" 7773 elif self._match_text_seq("MATERIALIZED", "VIEW"): 7774 kind = "MATERIALIZED VIEW" 7775 else: 7776 kind = "" 7777 7778 this = self._parse_string() or self._parse_table() 7779 if not kind and not isinstance(this, exp.Literal): 7780 return self._parse_as_command(self._prev) 7781 7782 return self.expression(exp.Refresh, this=this, kind=kind) 7783 7784 def _parse_column_def_with_exists(self): 7785 start = self._index 7786 self._match(TokenType.COLUMN) 7787 7788 exists_column = self._parse_exists(not_=True) 7789 expression = self._parse_field_def() 7790 7791 if not isinstance(expression, exp.ColumnDef): 7792 self._retreat(start) 7793 return None 7794 7795 expression.set("exists", exists_column) 7796 7797 return expression 7798 7799 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7800 if not self._prev.text.upper() == "ADD": 7801 return None 7802 7803 expression = self._parse_column_def_with_exists() 7804 if not expression: 7805 return None 7806 7807 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7808 if self._match_texts(("FIRST", "AFTER")): 7809 position = self._prev.text 7810 column_position = self.expression( 7811 exp.ColumnPosition, this=self._parse_column(), position=position 7812 ) 7813 expression.set("position", column_position) 7814 7815 return expression 7816 7817 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7818 drop = self._match(TokenType.DROP) and self._parse_drop() 7819 if drop and not isinstance(drop, exp.Command): 7820 drop.set("kind", drop.args.get("kind", "COLUMN")) 7821 return drop 7822 7823 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7824 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7825 return self.expression( 7826 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7827 ) 7828 7829 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7830 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7831 self._match_text_seq("ADD") 7832 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7833 return self.expression( 7834 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7835 ) 7836 7837 column_def = self._parse_add_column() 7838 if isinstance(column_def, exp.ColumnDef): 7839 return column_def 7840 7841 exists = self._parse_exists(not_=True) 7842 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7843 return self.expression( 7844 exp.AddPartition, 7845 exists=exists, 7846 this=self._parse_field(any_token=True), 7847 location=self._match_text_seq("LOCATION", advance=False) 7848 and self._parse_property(), 7849 ) 7850 7851 return None 7852 7853 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7854 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7855 or self._match_text_seq("COLUMNS") 7856 ): 7857 schema = self._parse_schema() 7858 7859 return ( 7860 ensure_list(schema) 7861 if schema 7862 else self._parse_csv(self._parse_column_def_with_exists) 7863 ) 7864 7865 return self._parse_csv(_parse_add_alteration) 7866 7867 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7868 if self._match_texts(self.ALTER_ALTER_PARSERS): 7869 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7870 7871 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7872 # keyword after ALTER we default to parsing this statement 7873 self._match(TokenType.COLUMN) 7874 column = self._parse_field(any_token=True) 7875 7876 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7877 return self.expression(exp.AlterColumn, this=column, drop=True) 7878 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7879 return self.expression(exp.AlterColumn, this=column, default=self._parse_disjunction()) 7880 if self._match(TokenType.COMMENT): 7881 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7882 if self._match_text_seq("DROP", "NOT", "NULL"): 7883 return self.expression( 7884 exp.AlterColumn, 7885 this=column, 7886 drop=True, 7887 allow_null=True, 7888 ) 7889 if self._match_text_seq("SET", "NOT", "NULL"): 7890 return self.expression( 7891 exp.AlterColumn, 7892 this=column, 7893 allow_null=False, 7894 ) 7895 7896 if self._match_text_seq("SET", "VISIBLE"): 7897 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7898 if self._match_text_seq("SET", "INVISIBLE"): 7899 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7900 7901 self._match_text_seq("SET", "DATA") 7902 self._match_text_seq("TYPE") 7903 return self.expression( 7904 exp.AlterColumn, 7905 this=column, 7906 dtype=self._parse_types(), 7907 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7908 using=self._match(TokenType.USING) and self._parse_disjunction(), 7909 ) 7910 7911 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7912 if self._match_texts(("ALL", "EVEN", "AUTO")): 7913 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7914 7915 self._match_text_seq("KEY", "DISTKEY") 7916 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7917 7918 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7919 if compound: 7920 self._match_text_seq("SORTKEY") 7921 7922 if self._match(TokenType.L_PAREN, advance=False): 7923 return self.expression( 7924 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7925 ) 7926 7927 self._match_texts(("AUTO", "NONE")) 7928 return self.expression( 7929 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7930 ) 7931 7932 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7933 index = self._index - 1 7934 7935 partition_exists = self._parse_exists() 7936 if self._match(TokenType.PARTITION, advance=False): 7937 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7938 7939 self._retreat(index) 7940 return self._parse_csv(self._parse_drop_column) 7941 7942 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7943 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7944 exists = self._parse_exists() 7945 old_column = self._parse_column() 7946 to = self._match_text_seq("TO") 7947 new_column = self._parse_column() 7948 7949 if old_column is None or to is None or new_column is None: 7950 return None 7951 7952 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7953 7954 self._match_text_seq("TO") 7955 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7956 7957 def _parse_alter_table_set(self) -> exp.AlterSet: 7958 alter_set = self.expression(exp.AlterSet) 7959 7960 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7961 "TABLE", "PROPERTIES" 7962 ): 7963 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7964 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7965 alter_set.set("expressions", [self._parse_assignment()]) 7966 elif self._match_texts(("LOGGED", "UNLOGGED")): 7967 alter_set.set("option", exp.var(self._prev.text.upper())) 7968 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7969 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7970 elif self._match_text_seq("LOCATION"): 7971 alter_set.set("location", self._parse_field()) 7972 elif self._match_text_seq("ACCESS", "METHOD"): 7973 alter_set.set("access_method", self._parse_field()) 7974 elif self._match_text_seq("TABLESPACE"): 7975 alter_set.set("tablespace", self._parse_field()) 7976 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7977 alter_set.set("file_format", [self._parse_field()]) 7978 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7979 alter_set.set("file_format", self._parse_wrapped_options()) 7980 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7981 alter_set.set("copy_options", self._parse_wrapped_options()) 7982 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7983 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7984 else: 7985 if self._match_text_seq("SERDE"): 7986 alter_set.set("serde", self._parse_field()) 7987 7988 properties = self._parse_wrapped(self._parse_properties, optional=True) 7989 alter_set.set("expressions", [properties]) 7990 7991 return alter_set 7992 7993 def _parse_alter_session(self) -> exp.AlterSession: 7994 """Parse ALTER SESSION SET/UNSET statements.""" 7995 if self._match(TokenType.SET): 7996 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7997 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7998 7999 self._match_text_seq("UNSET") 8000 expressions = self._parse_csv( 8001 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 8002 ) 8003 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 8004 8005 def _parse_alter(self) -> exp.Alter | exp.Command: 8006 start = self._prev 8007 8008 alter_token = self._match_set(self.ALTERABLES) and self._prev 8009 if not alter_token: 8010 return self._parse_as_command(start) 8011 8012 exists = self._parse_exists() 8013 only = self._match_text_seq("ONLY") 8014 8015 if alter_token.token_type == TokenType.SESSION: 8016 this = None 8017 check = None 8018 cluster = None 8019 else: 8020 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8021 check = self._match_text_seq("WITH", "CHECK") 8022 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8023 8024 if self._next: 8025 self._advance() 8026 8027 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8028 if parser: 8029 actions = ensure_list(parser(self)) 8030 not_valid = self._match_text_seq("NOT", "VALID") 8031 options = self._parse_csv(self._parse_property) 8032 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8033 8034 if not self._curr and actions: 8035 return self.expression( 8036 exp.Alter, 8037 this=this, 8038 kind=alter_token.text.upper(), 8039 exists=exists, 8040 actions=actions, 8041 only=only, 8042 options=options, 8043 cluster=cluster, 8044 not_valid=not_valid, 8045 check=check, 8046 cascade=cascade, 8047 ) 8048 8049 return self._parse_as_command(start) 8050 8051 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8052 start = self._prev 8053 # https://duckdb.org/docs/sql/statements/analyze 8054 if not self._curr: 8055 return self.expression(exp.Analyze) 8056 8057 options = [] 8058 while self._match_texts(self.ANALYZE_STYLES): 8059 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8060 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8061 else: 8062 options.append(self._prev.text.upper()) 8063 8064 this: t.Optional[exp.Expression] = None 8065 inner_expression: t.Optional[exp.Expression] = None 8066 8067 kind = self._curr and self._curr.text.upper() 8068 8069 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8070 this = self._parse_table_parts() 8071 elif self._match_text_seq("TABLES"): 8072 if self._match_set((TokenType.FROM, TokenType.IN)): 8073 kind = f"{kind} {self._prev.text.upper()}" 8074 this = self._parse_table(schema=True, is_db_reference=True) 8075 elif self._match_text_seq("DATABASE"): 8076 this = self._parse_table(schema=True, is_db_reference=True) 8077 elif self._match_text_seq("CLUSTER"): 8078 this = self._parse_table() 8079 # Try matching inner expr keywords before fallback to parse table. 8080 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8081 kind = None 8082 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8083 else: 8084 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8085 kind = None 8086 this = self._parse_table_parts() 8087 8088 partition = self._try_parse(self._parse_partition) 8089 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8090 return self._parse_as_command(start) 8091 8092 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8093 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8094 "WITH", "ASYNC", "MODE" 8095 ): 8096 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8097 else: 8098 mode = None 8099 8100 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8101 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8102 8103 properties = self._parse_properties() 8104 return self.expression( 8105 exp.Analyze, 8106 kind=kind, 8107 this=this, 8108 mode=mode, 8109 partition=partition, 8110 properties=properties, 8111 expression=inner_expression, 8112 options=options, 8113 ) 8114 8115 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8116 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8117 this = None 8118 kind = self._prev.text.upper() 8119 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8120 expressions = [] 8121 8122 if not self._match_text_seq("STATISTICS"): 8123 self.raise_error("Expecting token STATISTICS") 8124 8125 if self._match_text_seq("NOSCAN"): 8126 this = "NOSCAN" 8127 elif self._match(TokenType.FOR): 8128 if self._match_text_seq("ALL", "COLUMNS"): 8129 this = "FOR ALL COLUMNS" 8130 if self._match_texts("COLUMNS"): 8131 this = "FOR COLUMNS" 8132 expressions = self._parse_csv(self._parse_column_reference) 8133 elif self._match_text_seq("SAMPLE"): 8134 sample = self._parse_number() 8135 expressions = [ 8136 self.expression( 8137 exp.AnalyzeSample, 8138 sample=sample, 8139 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8140 ) 8141 ] 8142 8143 return self.expression( 8144 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 8145 ) 8146 8147 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8148 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8149 kind = None 8150 this = None 8151 expression: t.Optional[exp.Expression] = None 8152 if self._match_text_seq("REF", "UPDATE"): 8153 kind = "REF" 8154 this = "UPDATE" 8155 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8156 this = "UPDATE SET DANGLING TO NULL" 8157 elif self._match_text_seq("STRUCTURE"): 8158 kind = "STRUCTURE" 8159 if self._match_text_seq("CASCADE", "FAST"): 8160 this = "CASCADE FAST" 8161 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8162 ("ONLINE", "OFFLINE") 8163 ): 8164 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8165 expression = self._parse_into() 8166 8167 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 8168 8169 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 8170 this = self._prev.text.upper() 8171 if self._match_text_seq("COLUMNS"): 8172 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 8173 return None 8174 8175 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 8176 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8177 if self._match_text_seq("STATISTICS"): 8178 return self.expression(exp.AnalyzeDelete, kind=kind) 8179 return None 8180 8181 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 8182 if self._match_text_seq("CHAINED", "ROWS"): 8183 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 8184 return None 8185 8186 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8187 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8188 this = self._prev.text.upper() 8189 expression: t.Optional[exp.Expression] = None 8190 expressions = [] 8191 update_options = None 8192 8193 if self._match_text_seq("HISTOGRAM", "ON"): 8194 expressions = self._parse_csv(self._parse_column_reference) 8195 with_expressions = [] 8196 while self._match(TokenType.WITH): 8197 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8198 if self._match_texts(("SYNC", "ASYNC")): 8199 if self._match_text_seq("MODE", advance=False): 8200 with_expressions.append(f"{self._prev.text.upper()} MODE") 8201 self._advance() 8202 else: 8203 buckets = self._parse_number() 8204 if self._match_text_seq("BUCKETS"): 8205 with_expressions.append(f"{buckets} BUCKETS") 8206 if with_expressions: 8207 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 8208 8209 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8210 TokenType.UPDATE, advance=False 8211 ): 8212 update_options = self._prev.text.upper() 8213 self._advance() 8214 elif self._match_text_seq("USING", "DATA"): 8215 expression = self.expression(exp.UsingData, this=self._parse_string()) 8216 8217 return self.expression( 8218 exp.AnalyzeHistogram, 8219 this=this, 8220 expressions=expressions, 8221 expression=expression, 8222 update_options=update_options, 8223 ) 8224 8225 def _parse_merge(self) -> exp.Merge: 8226 self._match(TokenType.INTO) 8227 target = self._parse_table() 8228 8229 if target and self._match(TokenType.ALIAS, advance=False): 8230 target.set("alias", self._parse_table_alias()) 8231 8232 self._match(TokenType.USING) 8233 using = self._parse_table() 8234 8235 return self.expression( 8236 exp.Merge, 8237 this=target, 8238 using=using, 8239 on=self._match(TokenType.ON) and self._parse_disjunction(), 8240 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8241 whens=self._parse_when_matched(), 8242 returning=self._parse_returning(), 8243 ) 8244 8245 def _parse_when_matched(self) -> exp.Whens: 8246 whens = [] 8247 8248 while self._match(TokenType.WHEN): 8249 matched = not self._match(TokenType.NOT) 8250 self._match_text_seq("MATCHED") 8251 source = ( 8252 False 8253 if self._match_text_seq("BY", "TARGET") 8254 else self._match_text_seq("BY", "SOURCE") 8255 ) 8256 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8257 8258 self._match(TokenType.THEN) 8259 8260 if self._match(TokenType.INSERT): 8261 this = self._parse_star() 8262 if this: 8263 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 8264 else: 8265 then = self.expression( 8266 exp.Insert, 8267 this=exp.var("ROW") 8268 if self._match_text_seq("ROW") 8269 else self._parse_value(values=False), 8270 expression=self._match_text_seq("VALUES") and self._parse_value(), 8271 ) 8272 elif self._match(TokenType.UPDATE): 8273 expressions = self._parse_star() 8274 if expressions: 8275 then = self.expression(exp.Update, expressions=expressions) 8276 else: 8277 then = self.expression( 8278 exp.Update, 8279 expressions=self._match(TokenType.SET) 8280 and self._parse_csv(self._parse_equality), 8281 ) 8282 elif self._match(TokenType.DELETE): 8283 then = self.expression(exp.Var, this=self._prev.text) 8284 else: 8285 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8286 8287 whens.append( 8288 self.expression( 8289 exp.When, 8290 matched=matched, 8291 source=source, 8292 condition=condition, 8293 then=then, 8294 ) 8295 ) 8296 return self.expression(exp.Whens, expressions=whens) 8297 8298 def _parse_show(self) -> t.Optional[exp.Expression]: 8299 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8300 if parser: 8301 return parser(self) 8302 return self._parse_as_command(self._prev) 8303 8304 def _parse_set_item_assignment( 8305 self, kind: t.Optional[str] = None 8306 ) -> t.Optional[exp.Expression]: 8307 index = self._index 8308 8309 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8310 return self._parse_set_transaction(global_=kind == "GLOBAL") 8311 8312 left = self._parse_primary() or self._parse_column() 8313 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8314 8315 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8316 self._retreat(index) 8317 return None 8318 8319 right = self._parse_statement() or self._parse_id_var() 8320 if isinstance(right, (exp.Column, exp.Identifier)): 8321 right = exp.var(right.name) 8322 8323 this = self.expression(exp.EQ, this=left, expression=right) 8324 return self.expression(exp.SetItem, this=this, kind=kind) 8325 8326 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8327 self._match_text_seq("TRANSACTION") 8328 characteristics = self._parse_csv( 8329 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8330 ) 8331 return self.expression( 8332 exp.SetItem, 8333 expressions=characteristics, 8334 kind="TRANSACTION", 8335 global_=global_, 8336 ) 8337 8338 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8339 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8340 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8341 8342 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8343 index = self._index 8344 set_ = self.expression( 8345 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8346 ) 8347 8348 if self._curr: 8349 self._retreat(index) 8350 return self._parse_as_command(self._prev) 8351 8352 return set_ 8353 8354 def _parse_var_from_options( 8355 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8356 ) -> t.Optional[exp.Var]: 8357 start = self._curr 8358 if not start: 8359 return None 8360 8361 option = start.text.upper() 8362 continuations = options.get(option) 8363 8364 index = self._index 8365 self._advance() 8366 for keywords in continuations or []: 8367 if isinstance(keywords, str): 8368 keywords = (keywords,) 8369 8370 if self._match_text_seq(*keywords): 8371 option = f"{option} {' '.join(keywords)}" 8372 break 8373 else: 8374 if continuations or continuations is None: 8375 if raise_unmatched: 8376 self.raise_error(f"Unknown option {option}") 8377 8378 self._retreat(index) 8379 return None 8380 8381 return exp.var(option) 8382 8383 def _parse_as_command(self, start: Token) -> exp.Command: 8384 while self._curr: 8385 self._advance() 8386 text = self._find_sql(start, self._prev) 8387 size = len(start.text) 8388 self._warn_unsupported() 8389 return exp.Command(this=text[:size], expression=text[size:]) 8390 8391 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8392 settings = [] 8393 8394 self._match_l_paren() 8395 kind = self._parse_id_var() 8396 8397 if self._match(TokenType.L_PAREN): 8398 while True: 8399 key = self._parse_id_var() 8400 value = self._parse_primary() 8401 if not key and value is None: 8402 break 8403 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8404 self._match(TokenType.R_PAREN) 8405 8406 self._match_r_paren() 8407 8408 return self.expression( 8409 exp.DictProperty, 8410 this=this, 8411 kind=kind.this if kind else None, 8412 settings=settings, 8413 ) 8414 8415 def _parse_dict_range(self, this: str) -> exp.DictRange: 8416 self._match_l_paren() 8417 has_min = self._match_text_seq("MIN") 8418 if has_min: 8419 min = self._parse_var() or self._parse_primary() 8420 self._match_text_seq("MAX") 8421 max = self._parse_var() or self._parse_primary() 8422 else: 8423 max = self._parse_var() or self._parse_primary() 8424 min = exp.Literal.number(0) 8425 self._match_r_paren() 8426 return self.expression(exp.DictRange, this=this, min=min, max=max) 8427 8428 def _parse_comprehension( 8429 self, this: t.Optional[exp.Expression] 8430 ) -> t.Optional[exp.Comprehension]: 8431 index = self._index 8432 expression = self._parse_column() 8433 position = self._match(TokenType.COMMA) and self._parse_column() 8434 8435 if not self._match(TokenType.IN): 8436 self._retreat(index - 1) 8437 return None 8438 iterator = self._parse_column() 8439 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 8440 return self.expression( 8441 exp.Comprehension, 8442 this=this, 8443 expression=expression, 8444 position=position, 8445 iterator=iterator, 8446 condition=condition, 8447 ) 8448 8449 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8450 if self._match(TokenType.HEREDOC_STRING): 8451 return self.expression(exp.Heredoc, this=self._prev.text) 8452 8453 if not self._match_text_seq("$"): 8454 return None 8455 8456 tags = ["$"] 8457 tag_text = None 8458 8459 if self._is_connected(): 8460 self._advance() 8461 tags.append(self._prev.text.upper()) 8462 else: 8463 self.raise_error("No closing $ found") 8464 8465 if tags[-1] != "$": 8466 if self._is_connected() and self._match_text_seq("$"): 8467 tag_text = tags[-1] 8468 tags.append("$") 8469 else: 8470 self.raise_error("No closing $ found") 8471 8472 heredoc_start = self._curr 8473 8474 while self._curr: 8475 if self._match_text_seq(*tags, advance=False): 8476 this = self._find_sql(heredoc_start, self._prev) 8477 self._advance(len(tags)) 8478 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8479 8480 self._advance() 8481 8482 self.raise_error(f"No closing {''.join(tags)} found") 8483 return None 8484 8485 def _find_parser( 8486 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8487 ) -> t.Optional[t.Callable]: 8488 if not self._curr: 8489 return None 8490 8491 index = self._index 8492 this = [] 8493 while True: 8494 # The current token might be multiple words 8495 curr = self._curr.text.upper() 8496 key = curr.split(" ") 8497 this.append(curr) 8498 8499 self._advance() 8500 result, trie = in_trie(trie, key) 8501 if result == TrieResult.FAILED: 8502 break 8503 8504 if result == TrieResult.EXISTS: 8505 subparser = parsers[" ".join(this)] 8506 return subparser 8507 8508 self._retreat(index) 8509 return None 8510 8511 def _match(self, token_type, advance=True, expression=None): 8512 if not self._curr: 8513 return None 8514 8515 if self._curr.token_type == token_type: 8516 if advance: 8517 self._advance() 8518 self._add_comments(expression) 8519 return True 8520 8521 return None 8522 8523 def _match_set(self, types, advance=True): 8524 if not self._curr: 8525 return None 8526 8527 if self._curr.token_type in types: 8528 if advance: 8529 self._advance() 8530 return True 8531 8532 return None 8533 8534 def _match_pair(self, token_type_a, token_type_b, advance=True): 8535 if not self._curr or not self._next: 8536 return None 8537 8538 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8539 if advance: 8540 self._advance(2) 8541 return True 8542 8543 return None 8544 8545 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8546 if not self._match(TokenType.L_PAREN, expression=expression): 8547 self.raise_error("Expecting (") 8548 8549 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8550 if not self._match(TokenType.R_PAREN, expression=expression): 8551 self.raise_error("Expecting )") 8552 8553 def _match_texts(self, texts, advance=True): 8554 if ( 8555 self._curr 8556 and self._curr.token_type != TokenType.STRING 8557 and self._curr.text.upper() in texts 8558 ): 8559 if advance: 8560 self._advance() 8561 return True 8562 return None 8563 8564 def _match_text_seq(self, *texts, advance=True): 8565 index = self._index 8566 for text in texts: 8567 if ( 8568 self._curr 8569 and self._curr.token_type != TokenType.STRING 8570 and self._curr.text.upper() == text 8571 ): 8572 self._advance() 8573 else: 8574 self._retreat(index) 8575 return None 8576 8577 if not advance: 8578 self._retreat(index) 8579 8580 return True 8581 8582 def _replace_lambda( 8583 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8584 ) -> t.Optional[exp.Expression]: 8585 if not node: 8586 return node 8587 8588 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8589 8590 for column in node.find_all(exp.Column): 8591 typ = lambda_types.get(column.parts[0].name) 8592 if typ is not None: 8593 dot_or_id = column.to_dot() if column.table else column.this 8594 8595 if typ: 8596 dot_or_id = self.expression( 8597 exp.Cast, 8598 this=dot_or_id, 8599 to=typ, 8600 ) 8601 8602 parent = column.parent 8603 8604 while isinstance(parent, exp.Dot): 8605 if not isinstance(parent.parent, exp.Dot): 8606 parent.replace(dot_or_id) 8607 break 8608 parent = parent.parent 8609 else: 8610 if column is node: 8611 node = dot_or_id 8612 else: 8613 column.replace(dot_or_id) 8614 return node 8615 8616 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8617 start = self._prev 8618 8619 # Not to be confused with TRUNCATE(number, decimals) function call 8620 if self._match(TokenType.L_PAREN): 8621 self._retreat(self._index - 2) 8622 return self._parse_function() 8623 8624 # Clickhouse supports TRUNCATE DATABASE as well 8625 is_database = self._match(TokenType.DATABASE) 8626 8627 self._match(TokenType.TABLE) 8628 8629 exists = self._parse_exists(not_=False) 8630 8631 expressions = self._parse_csv( 8632 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8633 ) 8634 8635 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8636 8637 if self._match_text_seq("RESTART", "IDENTITY"): 8638 identity = "RESTART" 8639 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8640 identity = "CONTINUE" 8641 else: 8642 identity = None 8643 8644 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8645 option = self._prev.text 8646 else: 8647 option = None 8648 8649 partition = self._parse_partition() 8650 8651 # Fallback case 8652 if self._curr: 8653 return self._parse_as_command(start) 8654 8655 return self.expression( 8656 exp.TruncateTable, 8657 expressions=expressions, 8658 is_database=is_database, 8659 exists=exists, 8660 cluster=cluster, 8661 identity=identity, 8662 option=option, 8663 partition=partition, 8664 ) 8665 8666 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8667 this = self._parse_ordered(self._parse_opclass) 8668 8669 if not self._match(TokenType.WITH): 8670 return this 8671 8672 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 8673 8674 return self.expression(exp.WithOperator, this=this, op=op) 8675 8676 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8677 self._match(TokenType.EQ) 8678 self._match(TokenType.L_PAREN) 8679 8680 opts: t.List[t.Optional[exp.Expression]] = [] 8681 option: exp.Expression | None 8682 while self._curr and not self._match(TokenType.R_PAREN): 8683 if self._match_text_seq("FORMAT_NAME", "="): 8684 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8685 option = self._parse_format_name() 8686 else: 8687 option = self._parse_property() 8688 8689 if option is None: 8690 self.raise_error("Unable to parse option") 8691 break 8692 8693 opts.append(option) 8694 8695 return opts 8696 8697 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8698 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8699 8700 options = [] 8701 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8702 option = self._parse_var(any_token=True) 8703 prev = self._prev.text.upper() 8704 8705 # Different dialects might separate options and values by white space, "=" and "AS" 8706 self._match(TokenType.EQ) 8707 self._match(TokenType.ALIAS) 8708 8709 param = self.expression(exp.CopyParameter, this=option) 8710 8711 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8712 TokenType.L_PAREN, advance=False 8713 ): 8714 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8715 param.set("expressions", self._parse_wrapped_options()) 8716 elif prev == "FILE_FORMAT": 8717 # T-SQL's external file format case 8718 param.set("expression", self._parse_field()) 8719 elif ( 8720 prev == "FORMAT" 8721 and self._prev.token_type == TokenType.ALIAS 8722 and self._match_texts(("AVRO", "JSON")) 8723 ): 8724 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 8725 param.set("expression", self._parse_field()) 8726 else: 8727 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 8728 8729 options.append(param) 8730 self._match(sep) 8731 8732 return options 8733 8734 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8735 expr = self.expression(exp.Credentials) 8736 8737 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8738 expr.set("storage", self._parse_field()) 8739 if self._match_text_seq("CREDENTIALS"): 8740 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8741 creds = ( 8742 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8743 ) 8744 expr.set("credentials", creds) 8745 if self._match_text_seq("ENCRYPTION"): 8746 expr.set("encryption", self._parse_wrapped_options()) 8747 if self._match_text_seq("IAM_ROLE"): 8748 expr.set( 8749 "iam_role", 8750 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 8751 ) 8752 if self._match_text_seq("REGION"): 8753 expr.set("region", self._parse_field()) 8754 8755 return expr 8756 8757 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8758 return self._parse_field() 8759 8760 def _parse_copy(self) -> exp.Copy | exp.Command: 8761 start = self._prev 8762 8763 self._match(TokenType.INTO) 8764 8765 this = ( 8766 self._parse_select(nested=True, parse_subquery_alias=False) 8767 if self._match(TokenType.L_PAREN, advance=False) 8768 else self._parse_table(schema=True) 8769 ) 8770 8771 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8772 8773 files = self._parse_csv(self._parse_file_location) 8774 if self._match(TokenType.EQ, advance=False): 8775 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8776 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8777 # list via `_parse_wrapped(..)` below. 8778 self._advance(-1) 8779 files = [] 8780 8781 credentials = self._parse_credentials() 8782 8783 self._match_text_seq("WITH") 8784 8785 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8786 8787 # Fallback case 8788 if self._curr: 8789 return self._parse_as_command(start) 8790 8791 return self.expression( 8792 exp.Copy, 8793 this=this, 8794 kind=kind, 8795 credentials=credentials, 8796 files=files, 8797 params=params, 8798 ) 8799 8800 def _parse_normalize(self) -> exp.Normalize: 8801 return self.expression( 8802 exp.Normalize, 8803 this=self._parse_bitwise(), 8804 form=self._match(TokenType.COMMA) and self._parse_var(), 8805 ) 8806 8807 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8808 args = self._parse_csv(lambda: self._parse_lambda()) 8809 8810 this = seq_get(args, 0) 8811 decimals = seq_get(args, 1) 8812 8813 return expr_type( 8814 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8815 ) 8816 8817 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8818 star_token = self._prev 8819 8820 if self._match_text_seq("COLUMNS", "(", advance=False): 8821 this = self._parse_function() 8822 if isinstance(this, exp.Columns): 8823 this.set("unpack", True) 8824 return this 8825 8826 return self.expression( 8827 exp.Star, 8828 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 8829 replace=self._parse_star_op("REPLACE"), 8830 rename=self._parse_star_op("RENAME"), 8831 ).update_positions(star_token) 8832 8833 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8834 privilege_parts = [] 8835 8836 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8837 # (end of privilege list) or L_PAREN (start of column list) are met 8838 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8839 privilege_parts.append(self._curr.text.upper()) 8840 self._advance() 8841 8842 this = exp.var(" ".join(privilege_parts)) 8843 expressions = ( 8844 self._parse_wrapped_csv(self._parse_column) 8845 if self._match(TokenType.L_PAREN, advance=False) 8846 else None 8847 ) 8848 8849 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8850 8851 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8852 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8853 principal = self._parse_id_var() 8854 8855 if not principal: 8856 return None 8857 8858 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8859 8860 def _parse_grant_revoke_common( 8861 self, 8862 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8863 privileges = self._parse_csv(self._parse_grant_privilege) 8864 8865 self._match(TokenType.ON) 8866 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8867 8868 # Attempt to parse the securable e.g. MySQL allows names 8869 # such as "foo.*", "*.*" which are not easily parseable yet 8870 securable = self._try_parse(self._parse_table_parts) 8871 8872 return privileges, kind, securable 8873 8874 def _parse_grant(self) -> exp.Grant | exp.Command: 8875 start = self._prev 8876 8877 privileges, kind, securable = self._parse_grant_revoke_common() 8878 8879 if not securable or not self._match_text_seq("TO"): 8880 return self._parse_as_command(start) 8881 8882 principals = self._parse_csv(self._parse_grant_principal) 8883 8884 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8885 8886 if self._curr: 8887 return self._parse_as_command(start) 8888 8889 return self.expression( 8890 exp.Grant, 8891 privileges=privileges, 8892 kind=kind, 8893 securable=securable, 8894 principals=principals, 8895 grant_option=grant_option, 8896 ) 8897 8898 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8899 start = self._prev 8900 8901 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8902 8903 privileges, kind, securable = self._parse_grant_revoke_common() 8904 8905 if not securable or not self._match_text_seq("FROM"): 8906 return self._parse_as_command(start) 8907 8908 principals = self._parse_csv(self._parse_grant_principal) 8909 8910 cascade = None 8911 if self._match_texts(("CASCADE", "RESTRICT")): 8912 cascade = self._prev.text.upper() 8913 8914 if self._curr: 8915 return self._parse_as_command(start) 8916 8917 return self.expression( 8918 exp.Revoke, 8919 privileges=privileges, 8920 kind=kind, 8921 securable=securable, 8922 principals=principals, 8923 grant_option=grant_option, 8924 cascade=cascade, 8925 ) 8926 8927 def _parse_overlay(self) -> exp.Overlay: 8928 def _parse_overlay_arg(text: str) -> t.Optional[exp.Expression]: 8929 return ( 8930 self._match(TokenType.COMMA) or self._match_text_seq(text) 8931 ) and self._parse_bitwise() 8932 8933 return self.expression( 8934 exp.Overlay, 8935 this=self._parse_bitwise(), 8936 expression=_parse_overlay_arg("PLACING"), 8937 from_=_parse_overlay_arg("FROM"), 8938 for_=_parse_overlay_arg("FOR"), 8939 ) 8940 8941 def _parse_format_name(self) -> exp.Property: 8942 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8943 # for FILE_FORMAT = <format_name> 8944 return self.expression( 8945 exp.Property, 8946 this=exp.var("FORMAT_NAME"), 8947 value=self._parse_string() or self._parse_table_parts(), 8948 ) 8949 8950 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8951 args: t.List[exp.Expression] = [] 8952 8953 if self._match(TokenType.DISTINCT): 8954 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8955 self._match(TokenType.COMMA) 8956 8957 args.extend(self._parse_function_args()) 8958 8959 return self.expression( 8960 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8961 ) 8962 8963 def _identifier_expression( 8964 self, token: t.Optional[Token] = None, **kwargs: t.Any 8965 ) -> exp.Identifier: 8966 return self.expression(exp.Identifier, token=token or self._prev, **kwargs) 8967 8968 def _build_pipe_cte( 8969 self, 8970 query: exp.Query, 8971 expressions: t.List[exp.Expression], 8972 alias_cte: t.Optional[exp.TableAlias] = None, 8973 ) -> exp.Select: 8974 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8975 if alias_cte: 8976 new_cte = alias_cte 8977 else: 8978 self._pipe_cte_counter += 1 8979 new_cte = f"__tmp{self._pipe_cte_counter}" 8980 8981 with_ = query.args.get("with_") 8982 ctes = with_.pop() if with_ else None 8983 8984 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8985 if ctes: 8986 new_select.set("with_", ctes) 8987 8988 return new_select.with_(new_cte, as_=query, copy=False) 8989 8990 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8991 select = self._parse_select(consume_pipe=False) 8992 if not select: 8993 return query 8994 8995 return self._build_pipe_cte( 8996 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8997 ) 8998 8999 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9000 limit = self._parse_limit() 9001 offset = self._parse_offset() 9002 if limit: 9003 curr_limit = query.args.get("limit", limit) 9004 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9005 query.limit(limit, copy=False) 9006 if offset: 9007 curr_offset = query.args.get("offset") 9008 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9009 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9010 9011 return query 9012 9013 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 9014 this = self._parse_disjunction() 9015 if self._match_text_seq("GROUP", "AND", advance=False): 9016 return this 9017 9018 this = self._parse_alias(this) 9019 9020 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9021 return self._parse_ordered(lambda: this) 9022 9023 return this 9024 9025 def _parse_pipe_syntax_aggregate_group_order_by( 9026 self, query: exp.Select, group_by_exists: bool = True 9027 ) -> exp.Select: 9028 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9029 aggregates_or_groups, orders = [], [] 9030 for element in expr: 9031 if isinstance(element, exp.Ordered): 9032 this = element.this 9033 if isinstance(this, exp.Alias): 9034 element.set("this", this.args["alias"]) 9035 orders.append(element) 9036 else: 9037 this = element 9038 aggregates_or_groups.append(this) 9039 9040 if group_by_exists: 9041 query.select(*aggregates_or_groups, copy=False).group_by( 9042 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9043 copy=False, 9044 ) 9045 else: 9046 query.select(*aggregates_or_groups, append=False, copy=False) 9047 9048 if orders: 9049 return query.order_by(*orders, append=False, copy=False) 9050 9051 return query 9052 9053 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9054 self._match_text_seq("AGGREGATE") 9055 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9056 9057 if self._match(TokenType.GROUP_BY) or ( 9058 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9059 ): 9060 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9061 9062 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9063 9064 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 9065 first_setop = self.parse_set_operation(this=query) 9066 if not first_setop: 9067 return None 9068 9069 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 9070 expr = self._parse_paren() 9071 return expr.assert_is(exp.Subquery).unnest() if expr else None 9072 9073 first_setop.this.pop() 9074 9075 setops = [ 9076 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9077 *self._parse_csv(_parse_and_unwrap_query), 9078 ] 9079 9080 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9081 with_ = query.args.get("with_") 9082 ctes = with_.pop() if with_ else None 9083 9084 if isinstance(first_setop, exp.Union): 9085 query = query.union(*setops, copy=False, **first_setop.args) 9086 elif isinstance(first_setop, exp.Except): 9087 query = query.except_(*setops, copy=False, **first_setop.args) 9088 else: 9089 query = query.intersect(*setops, copy=False, **first_setop.args) 9090 9091 query.set("with_", ctes) 9092 9093 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9094 9095 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 9096 join = self._parse_join() 9097 if not join: 9098 return None 9099 9100 if isinstance(query, exp.Select): 9101 return query.join(join, copy=False) 9102 9103 return query 9104 9105 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9106 pivots = self._parse_pivots() 9107 if not pivots: 9108 return query 9109 9110 from_ = query.args.get("from_") 9111 if from_: 9112 from_.this.set("pivots", pivots) 9113 9114 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9115 9116 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9117 self._match_text_seq("EXTEND") 9118 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9119 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9120 9121 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9122 sample = self._parse_table_sample() 9123 9124 with_ = query.args.get("with_") 9125 if with_: 9126 with_.expressions[-1].this.set("sample", sample) 9127 else: 9128 query.set("sample", sample) 9129 9130 return query 9131 9132 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 9133 if isinstance(query, exp.Subquery): 9134 query = exp.select("*").from_(query, copy=False) 9135 9136 if not query.args.get("from_"): 9137 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9138 9139 while self._match(TokenType.PIPE_GT): 9140 start = self._curr 9141 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 9142 if not parser: 9143 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9144 # keywords, making it tricky to disambiguate them without lookahead. The approach 9145 # here is to try and parse a set operation and if that fails, then try to parse a 9146 # join operator. If that fails as well, then the operator is not supported. 9147 parsed_query = self._parse_pipe_syntax_set_operator(query) 9148 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9149 if not parsed_query: 9150 self._retreat(start) 9151 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 9152 break 9153 query = parsed_query 9154 else: 9155 query = parser(self, query) 9156 9157 return query 9158 9159 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 9160 vars = self._parse_csv(self._parse_id_var) 9161 if not vars: 9162 return None 9163 9164 return self.expression( 9165 exp.DeclareItem, 9166 this=vars, 9167 kind=self._parse_types(), 9168 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 9169 ) 9170 9171 def _parse_declare(self) -> exp.Declare | exp.Command: 9172 start = self._prev 9173 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9174 9175 if not expressions or self._curr: 9176 return self._parse_as_command(start) 9177 9178 return self.expression(exp.Declare, expressions=expressions) 9179 9180 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9181 exp_class = exp.Cast if strict else exp.TryCast 9182 9183 if exp_class == exp.TryCast: 9184 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9185 9186 return self.expression(exp_class, **kwargs) 9187 9188 def _parse_json_value(self) -> exp.JSONValue: 9189 this = self._parse_bitwise() 9190 self._match(TokenType.COMMA) 9191 path = self._parse_bitwise() 9192 9193 returning = self._match(TokenType.RETURNING) and self._parse_type() 9194 9195 return self.expression( 9196 exp.JSONValue, 9197 this=this, 9198 path=self.dialect.to_json_path(path), 9199 returning=returning, 9200 on_condition=self._parse_on_condition(), 9201 ) 9202 9203 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 9204 def concat_exprs( 9205 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 9206 ) -> exp.Expression: 9207 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9208 concat_exprs = [ 9209 self.expression( 9210 exp.Concat, 9211 expressions=node.expressions, 9212 safe=True, 9213 coalesce=self.dialect.CONCAT_COALESCE, 9214 ) 9215 ] 9216 node.set("expressions", concat_exprs) 9217 return node 9218 if len(exprs) == 1: 9219 return exprs[0] 9220 return self.expression( 9221 exp.Concat, expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE 9222 ) 9223 9224 args = self._parse_csv(self._parse_lambda) 9225 9226 if args: 9227 order = args[-1] if isinstance(args[-1], exp.Order) else None 9228 9229 if order: 9230 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9231 # remove 'expr' from exp.Order and add it back to args 9232 args[-1] = order.this 9233 order.set("this", concat_exprs(order.this, args)) 9234 9235 this = order or concat_exprs(args[0], args) 9236 else: 9237 this = None 9238 9239 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9240 9241 return self.expression(exp.GroupConcat, this=this, separator=separator) 9242 9243 def _parse_initcap(self) -> exp.Initcap: 9244 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9245 9246 # attach dialect's default delimiters 9247 if expr.args.get("expression") is None: 9248 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9249 9250 return expr 9251 9252 def _parse_operator(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 9253 while True: 9254 if not self._match(TokenType.L_PAREN): 9255 break 9256 9257 op = "" 9258 while self._curr and not self._match(TokenType.R_PAREN): 9259 op += self._curr.text 9260 self._advance() 9261 9262 this = self.expression( 9263 exp.Operator, 9264 comments=self._prev_comments, 9265 this=this, 9266 operator=op, 9267 expression=self._parse_bitwise(), 9268 ) 9269 9270 if not self._match(TokenType.OPERATOR): 9271 break 9272 9273 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1716 def __init__( 1717 self, 1718 error_level: t.Optional[ErrorLevel] = None, 1719 error_message_context: int = 100, 1720 max_errors: int = 3, 1721 dialect: DialectType = None, 1722 ): 1723 from sqlglot.dialects import Dialect 1724 1725 self.error_level = error_level or ErrorLevel.IMMEDIATE 1726 self.error_message_context = error_message_context 1727 self.max_errors = max_errors 1728 self.dialect = Dialect.get_or_raise(dialect) 1729 self.reset()
1742 def parse( 1743 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1744 ) -> t.List[t.Optional[exp.Expression]]: 1745 """ 1746 Parses a list of tokens and returns a list of syntax trees, one tree 1747 per parsed SQL statement. 1748 1749 Args: 1750 raw_tokens: The list of tokens. 1751 sql: The original SQL string, used to produce helpful debug messages. 1752 1753 Returns: 1754 The list of the produced syntax trees. 1755 """ 1756 return self._parse( 1757 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1758 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1760 def parse_into( 1761 self, 1762 expression_types: exp.IntoType, 1763 raw_tokens: t.List[Token], 1764 sql: t.Optional[str] = None, 1765 ) -> t.List[t.Optional[exp.Expression]]: 1766 """ 1767 Parses a list of tokens into a given Expression type. If a collection of Expression 1768 types is given instead, this method will try to parse the token list into each one 1769 of them, stopping at the first for which the parsing succeeds. 1770 1771 Args: 1772 expression_types: The expression type(s) to try and parse the token list into. 1773 raw_tokens: The list of tokens. 1774 sql: The original SQL string, used to produce helpful debug messages. 1775 1776 Returns: 1777 The target Expression. 1778 """ 1779 errors = [] 1780 for expression_type in ensure_list(expression_types): 1781 parser = self.EXPRESSION_PARSERS.get(expression_type) 1782 if not parser: 1783 raise TypeError(f"No parser registered for {expression_type}") 1784 1785 try: 1786 return self._parse(parser, raw_tokens, sql) 1787 except ParseError as e: 1788 e.errors[0]["into_expression"] = expression_type 1789 errors.append(e) 1790 1791 raise ParseError( 1792 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1793 errors=merge_errors(errors), 1794 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1834 def check_errors(self) -> None: 1835 """Logs or raises any found errors, depending on the chosen error level setting.""" 1836 if self.error_level == ErrorLevel.WARN: 1837 for error in self.errors: 1838 logger.error(str(error)) 1839 elif self.error_level == ErrorLevel.RAISE and self.errors: 1840 raise ParseError( 1841 concat_messages(self.errors, self.max_errors), 1842 errors=merge_errors(self.errors), 1843 )
Logs or raises any found errors, depending on the chosen error level setting.
1845 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1846 """ 1847 Appends an error in the list of recorded errors or raises it, depending on the chosen 1848 error level setting. 1849 """ 1850 token = token or self._curr or self._prev or Token.string("") 1851 formatted_sql, start_context, highlight, end_context = highlight_sql( 1852 sql=self.sql, 1853 positions=[(token.start, token.end)], 1854 context_length=self.error_message_context, 1855 ) 1856 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1857 1858 error = ParseError.new( 1859 formatted_message, 1860 description=message, 1861 line=token.line, 1862 col=token.col, 1863 start_context=start_context, 1864 highlight=highlight, 1865 end_context=end_context, 1866 ) 1867 1868 if self.error_level == ErrorLevel.IMMEDIATE: 1869 raise error 1870 1871 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1873 def expression( 1874 self, 1875 exp_class: t.Type[E], 1876 token: t.Optional[Token] = None, 1877 comments: t.Optional[t.List[str]] = None, 1878 **kwargs, 1879 ) -> E: 1880 """ 1881 Creates a new, validated Expression. 1882 1883 Args: 1884 exp_class: The expression class to instantiate. 1885 comments: An optional list of comments to attach to the expression. 1886 kwargs: The arguments to set for the expression along with their respective values. 1887 1888 Returns: 1889 The target expression. 1890 """ 1891 if token: 1892 instance = exp_class(this=token.text, **kwargs) 1893 instance.update_positions(token) 1894 else: 1895 instance = exp_class(**kwargs) 1896 instance.add_comments(comments) if comments else self._add_comments(instance) 1897 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1904 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1905 """ 1906 Validates an Expression, making sure that all its mandatory arguments are set. 1907 1908 Args: 1909 expression: The expression to validate. 1910 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1911 1912 Returns: 1913 The validated expression. 1914 """ 1915 if self.error_level != ErrorLevel.IGNORE: 1916 for error_message in expression.error_messages(args): 1917 self.raise_error(error_message) 1918 1919 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
5068 def parse_set_operation( 5069 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 5070 ) -> t.Optional[exp.Expression]: 5071 start = self._index 5072 _, side_token, kind_token = self._parse_join_parts() 5073 5074 side = side_token.text if side_token else None 5075 kind = kind_token.text if kind_token else None 5076 5077 if not self._match_set(self.SET_OPERATIONS): 5078 self._retreat(start) 5079 return None 5080 5081 token_type = self._prev.token_type 5082 5083 if token_type == TokenType.UNION: 5084 operation: t.Type[exp.SetOperation] = exp.Union 5085 elif token_type == TokenType.EXCEPT: 5086 operation = exp.Except 5087 else: 5088 operation = exp.Intersect 5089 5090 comments = self._prev.comments 5091 5092 if self._match(TokenType.DISTINCT): 5093 distinct: t.Optional[bool] = True 5094 elif self._match(TokenType.ALL): 5095 distinct = False 5096 else: 5097 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5098 if distinct is None: 5099 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5100 5101 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 5102 "STRICT", "CORRESPONDING" 5103 ) 5104 if self._match_text_seq("CORRESPONDING"): 5105 by_name = True 5106 if not side and not kind: 5107 kind = "INNER" 5108 5109 on_column_list = None 5110 if by_name and self._match_texts(("ON", "BY")): 5111 on_column_list = self._parse_wrapped_csv(self._parse_column) 5112 5113 expression = self._parse_select( 5114 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5115 ) 5116 5117 return self.expression( 5118 operation, 5119 comments=comments, 5120 this=this, 5121 distinct=distinct, 5122 by_name=by_name, 5123 expression=expression, 5124 side=side, 5125 kind=kind, 5126 on=on_column_list, 5127 )