sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120class _Parser(type): 121 def __new__(cls, clsname, bases, attrs): 122 klass = super().__new__(cls, clsname, bases, attrs) 123 124 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 125 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 126 127 return klass 128 129 130class Parser(metaclass=_Parser): 131 """ 132 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 133 134 Args: 135 error_level: The desired error level. 136 Default: ErrorLevel.IMMEDIATE 137 error_message_context: The amount of context to capture from a query string when displaying 138 the error message (in number of characters). 139 Default: 100 140 max_errors: Maximum number of error messages to include in a raised ParseError. 141 This is only relevant if error_level is ErrorLevel.RAISE. 142 Default: 3 143 """ 144 145 FUNCTIONS: t.Dict[str, t.Callable] = { 146 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 147 "CONCAT": lambda args, dialect: exp.Concat( 148 expressions=args, 149 safe=not dialect.STRICT_STRING_CONCAT, 150 coalesce=dialect.CONCAT_COALESCE, 151 ), 152 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 153 expressions=args, 154 safe=not dialect.STRICT_STRING_CONCAT, 155 coalesce=dialect.CONCAT_COALESCE, 156 ), 157 "DATE_TO_DATE_STR": lambda args: exp.Cast( 158 this=seq_get(args, 0), 159 to=exp.DataType(this=exp.DataType.Type.TEXT), 160 ), 161 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 162 "HEX": build_hex, 163 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 164 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 165 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 166 "LIKE": build_like, 167 "LOG": build_logarithm, 168 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 169 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 170 "LOWER": build_lower, 171 "LPAD": lambda args: build_pad(args), 172 "LEFTPAD": lambda args: build_pad(args), 173 "MOD": build_mod, 174 "RPAD": lambda args: build_pad(args, is_left=False), 175 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 176 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 177 if len(args) != 2 178 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 179 "TIME_TO_TIME_STR": lambda args: exp.Cast( 180 this=seq_get(args, 0), 181 to=exp.DataType(this=exp.DataType.Type.TEXT), 182 ), 183 "TO_HEX": build_hex, 184 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 185 this=exp.Cast( 186 this=seq_get(args, 0), 187 to=exp.DataType(this=exp.DataType.Type.TEXT), 188 ), 189 start=exp.Literal.number(1), 190 length=exp.Literal.number(10), 191 ), 192 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 193 "UPPER": build_upper, 194 "VAR_MAP": build_var_map, 195 } 196 197 NO_PAREN_FUNCTIONS = { 198 TokenType.CURRENT_DATE: exp.CurrentDate, 199 TokenType.CURRENT_DATETIME: exp.CurrentDate, 200 TokenType.CURRENT_TIME: exp.CurrentTime, 201 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 202 TokenType.CURRENT_USER: exp.CurrentUser, 203 } 204 205 STRUCT_TYPE_TOKENS = { 206 TokenType.NESTED, 207 TokenType.OBJECT, 208 TokenType.STRUCT, 209 } 210 211 NESTED_TYPE_TOKENS = { 212 TokenType.ARRAY, 213 TokenType.LIST, 214 TokenType.LOWCARDINALITY, 215 TokenType.MAP, 216 TokenType.NULLABLE, 217 *STRUCT_TYPE_TOKENS, 218 } 219 220 ENUM_TYPE_TOKENS = { 221 TokenType.ENUM, 222 TokenType.ENUM8, 223 TokenType.ENUM16, 224 } 225 226 AGGREGATE_TYPE_TOKENS = { 227 TokenType.AGGREGATEFUNCTION, 228 TokenType.SIMPLEAGGREGATEFUNCTION, 229 } 230 231 TYPE_TOKENS = { 232 TokenType.BIT, 233 TokenType.BOOLEAN, 234 TokenType.TINYINT, 235 TokenType.UTINYINT, 236 TokenType.SMALLINT, 237 TokenType.USMALLINT, 238 TokenType.INT, 239 TokenType.UINT, 240 TokenType.BIGINT, 241 TokenType.UBIGINT, 242 TokenType.INT128, 243 TokenType.UINT128, 244 TokenType.INT256, 245 TokenType.UINT256, 246 TokenType.MEDIUMINT, 247 TokenType.UMEDIUMINT, 248 TokenType.FIXEDSTRING, 249 TokenType.FLOAT, 250 TokenType.DOUBLE, 251 TokenType.CHAR, 252 TokenType.NCHAR, 253 TokenType.VARCHAR, 254 TokenType.NVARCHAR, 255 TokenType.BPCHAR, 256 TokenType.TEXT, 257 TokenType.MEDIUMTEXT, 258 TokenType.LONGTEXT, 259 TokenType.MEDIUMBLOB, 260 TokenType.LONGBLOB, 261 TokenType.BINARY, 262 TokenType.VARBINARY, 263 TokenType.JSON, 264 TokenType.JSONB, 265 TokenType.INTERVAL, 266 TokenType.TINYBLOB, 267 TokenType.TINYTEXT, 268 TokenType.TIME, 269 TokenType.TIMETZ, 270 TokenType.TIMESTAMP, 271 TokenType.TIMESTAMP_S, 272 TokenType.TIMESTAMP_MS, 273 TokenType.TIMESTAMP_NS, 274 TokenType.TIMESTAMPTZ, 275 TokenType.TIMESTAMPLTZ, 276 TokenType.TIMESTAMPNTZ, 277 TokenType.DATETIME, 278 TokenType.DATETIME64, 279 TokenType.DATE, 280 TokenType.DATE32, 281 TokenType.INT4RANGE, 282 TokenType.INT4MULTIRANGE, 283 TokenType.INT8RANGE, 284 TokenType.INT8MULTIRANGE, 285 TokenType.NUMRANGE, 286 TokenType.NUMMULTIRANGE, 287 TokenType.TSRANGE, 288 TokenType.TSMULTIRANGE, 289 TokenType.TSTZRANGE, 290 TokenType.TSTZMULTIRANGE, 291 TokenType.DATERANGE, 292 TokenType.DATEMULTIRANGE, 293 TokenType.DECIMAL, 294 TokenType.UDECIMAL, 295 TokenType.BIGDECIMAL, 296 TokenType.UUID, 297 TokenType.GEOGRAPHY, 298 TokenType.GEOMETRY, 299 TokenType.HLLSKETCH, 300 TokenType.HSTORE, 301 TokenType.PSEUDO_TYPE, 302 TokenType.SUPER, 303 TokenType.SERIAL, 304 TokenType.SMALLSERIAL, 305 TokenType.BIGSERIAL, 306 TokenType.XML, 307 TokenType.YEAR, 308 TokenType.UNIQUEIDENTIFIER, 309 TokenType.USERDEFINED, 310 TokenType.MONEY, 311 TokenType.SMALLMONEY, 312 TokenType.ROWVERSION, 313 TokenType.IMAGE, 314 TokenType.VARIANT, 315 TokenType.VECTOR, 316 TokenType.OBJECT, 317 TokenType.OBJECT_IDENTIFIER, 318 TokenType.INET, 319 TokenType.IPADDRESS, 320 TokenType.IPPREFIX, 321 TokenType.IPV4, 322 TokenType.IPV6, 323 TokenType.UNKNOWN, 324 TokenType.NULL, 325 TokenType.NAME, 326 TokenType.TDIGEST, 327 *ENUM_TYPE_TOKENS, 328 *NESTED_TYPE_TOKENS, 329 *AGGREGATE_TYPE_TOKENS, 330 } 331 332 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 333 TokenType.BIGINT: TokenType.UBIGINT, 334 TokenType.INT: TokenType.UINT, 335 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 336 TokenType.SMALLINT: TokenType.USMALLINT, 337 TokenType.TINYINT: TokenType.UTINYINT, 338 TokenType.DECIMAL: TokenType.UDECIMAL, 339 } 340 341 SUBQUERY_PREDICATES = { 342 TokenType.ANY: exp.Any, 343 TokenType.ALL: exp.All, 344 TokenType.EXISTS: exp.Exists, 345 TokenType.SOME: exp.Any, 346 } 347 348 RESERVED_TOKENS = { 349 *Tokenizer.SINGLE_TOKENS.values(), 350 TokenType.SELECT, 351 } - {TokenType.IDENTIFIER} 352 353 DB_CREATABLES = { 354 TokenType.DATABASE, 355 TokenType.DICTIONARY, 356 TokenType.MODEL, 357 TokenType.SCHEMA, 358 TokenType.SEQUENCE, 359 TokenType.STORAGE_INTEGRATION, 360 TokenType.TABLE, 361 TokenType.TAG, 362 TokenType.VIEW, 363 TokenType.WAREHOUSE, 364 TokenType.STREAMLIT, 365 } 366 367 CREATABLES = { 368 TokenType.COLUMN, 369 TokenType.CONSTRAINT, 370 TokenType.FOREIGN_KEY, 371 TokenType.FUNCTION, 372 TokenType.INDEX, 373 TokenType.PROCEDURE, 374 *DB_CREATABLES, 375 } 376 377 # Tokens that can represent identifiers 378 ID_VAR_TOKENS = { 379 TokenType.ALL, 380 TokenType.VAR, 381 TokenType.ANTI, 382 TokenType.APPLY, 383 TokenType.ASC, 384 TokenType.ASOF, 385 TokenType.AUTO_INCREMENT, 386 TokenType.BEGIN, 387 TokenType.BPCHAR, 388 TokenType.CACHE, 389 TokenType.CASE, 390 TokenType.COLLATE, 391 TokenType.COMMAND, 392 TokenType.COMMENT, 393 TokenType.COMMIT, 394 TokenType.CONSTRAINT, 395 TokenType.COPY, 396 TokenType.DEFAULT, 397 TokenType.DELETE, 398 TokenType.DESC, 399 TokenType.DESCRIBE, 400 TokenType.DICTIONARY, 401 TokenType.DIV, 402 TokenType.END, 403 TokenType.EXECUTE, 404 TokenType.ESCAPE, 405 TokenType.FALSE, 406 TokenType.FIRST, 407 TokenType.FILTER, 408 TokenType.FINAL, 409 TokenType.FORMAT, 410 TokenType.FULL, 411 TokenType.IDENTIFIER, 412 TokenType.IS, 413 TokenType.ISNULL, 414 TokenType.INTERVAL, 415 TokenType.KEEP, 416 TokenType.KILL, 417 TokenType.LEFT, 418 TokenType.LOAD, 419 TokenType.MERGE, 420 TokenType.NATURAL, 421 TokenType.NEXT, 422 TokenType.OFFSET, 423 TokenType.OPERATOR, 424 TokenType.ORDINALITY, 425 TokenType.OVERLAPS, 426 TokenType.OVERWRITE, 427 TokenType.PARTITION, 428 TokenType.PERCENT, 429 TokenType.PIVOT, 430 TokenType.PRAGMA, 431 TokenType.RANGE, 432 TokenType.RECURSIVE, 433 TokenType.REFERENCES, 434 TokenType.REFRESH, 435 TokenType.REPLACE, 436 TokenType.RIGHT, 437 TokenType.ROLLUP, 438 TokenType.ROW, 439 TokenType.ROWS, 440 TokenType.SEMI, 441 TokenType.SET, 442 TokenType.SETTINGS, 443 TokenType.SHOW, 444 TokenType.TEMPORARY, 445 TokenType.TOP, 446 TokenType.TRUE, 447 TokenType.TRUNCATE, 448 TokenType.UNIQUE, 449 TokenType.UNNEST, 450 TokenType.UNPIVOT, 451 TokenType.UPDATE, 452 TokenType.USE, 453 TokenType.VOLATILE, 454 TokenType.WINDOW, 455 *CREATABLES, 456 *SUBQUERY_PREDICATES, 457 *TYPE_TOKENS, 458 *NO_PAREN_FUNCTIONS, 459 } 460 461 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 462 463 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 464 TokenType.ANTI, 465 TokenType.APPLY, 466 TokenType.ASOF, 467 TokenType.FULL, 468 TokenType.LEFT, 469 TokenType.LOCK, 470 TokenType.NATURAL, 471 TokenType.OFFSET, 472 TokenType.RIGHT, 473 TokenType.SEMI, 474 TokenType.WINDOW, 475 } 476 477 ALIAS_TOKENS = ID_VAR_TOKENS 478 479 ARRAY_CONSTRUCTORS = { 480 "ARRAY": exp.Array, 481 "LIST": exp.List, 482 } 483 484 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 485 486 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 487 488 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 489 490 FUNC_TOKENS = { 491 TokenType.COLLATE, 492 TokenType.COMMAND, 493 TokenType.CURRENT_DATE, 494 TokenType.CURRENT_DATETIME, 495 TokenType.CURRENT_TIMESTAMP, 496 TokenType.CURRENT_TIME, 497 TokenType.CURRENT_USER, 498 TokenType.FILTER, 499 TokenType.FIRST, 500 TokenType.FORMAT, 501 TokenType.GLOB, 502 TokenType.IDENTIFIER, 503 TokenType.INDEX, 504 TokenType.ISNULL, 505 TokenType.ILIKE, 506 TokenType.INSERT, 507 TokenType.LIKE, 508 TokenType.MERGE, 509 TokenType.OFFSET, 510 TokenType.PRIMARY_KEY, 511 TokenType.RANGE, 512 TokenType.REPLACE, 513 TokenType.RLIKE, 514 TokenType.ROW, 515 TokenType.UNNEST, 516 TokenType.VAR, 517 TokenType.LEFT, 518 TokenType.RIGHT, 519 TokenType.SEQUENCE, 520 TokenType.DATE, 521 TokenType.DATETIME, 522 TokenType.TABLE, 523 TokenType.TIMESTAMP, 524 TokenType.TIMESTAMPTZ, 525 TokenType.TRUNCATE, 526 TokenType.WINDOW, 527 TokenType.XOR, 528 *TYPE_TOKENS, 529 *SUBQUERY_PREDICATES, 530 } 531 532 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 533 TokenType.AND: exp.And, 534 } 535 536 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 537 TokenType.COLON_EQ: exp.PropertyEQ, 538 } 539 540 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 541 TokenType.OR: exp.Or, 542 } 543 544 EQUALITY = { 545 TokenType.EQ: exp.EQ, 546 TokenType.NEQ: exp.NEQ, 547 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 548 } 549 550 COMPARISON = { 551 TokenType.GT: exp.GT, 552 TokenType.GTE: exp.GTE, 553 TokenType.LT: exp.LT, 554 TokenType.LTE: exp.LTE, 555 } 556 557 BITWISE = { 558 TokenType.AMP: exp.BitwiseAnd, 559 TokenType.CARET: exp.BitwiseXor, 560 TokenType.PIPE: exp.BitwiseOr, 561 } 562 563 TERM = { 564 TokenType.DASH: exp.Sub, 565 TokenType.PLUS: exp.Add, 566 TokenType.MOD: exp.Mod, 567 TokenType.COLLATE: exp.Collate, 568 } 569 570 FACTOR = { 571 TokenType.DIV: exp.IntDiv, 572 TokenType.LR_ARROW: exp.Distance, 573 TokenType.SLASH: exp.Div, 574 TokenType.STAR: exp.Mul, 575 } 576 577 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 578 579 TIMES = { 580 TokenType.TIME, 581 TokenType.TIMETZ, 582 } 583 584 TIMESTAMPS = { 585 TokenType.TIMESTAMP, 586 TokenType.TIMESTAMPTZ, 587 TokenType.TIMESTAMPLTZ, 588 *TIMES, 589 } 590 591 SET_OPERATIONS = { 592 TokenType.UNION, 593 TokenType.INTERSECT, 594 TokenType.EXCEPT, 595 } 596 597 JOIN_METHODS = { 598 TokenType.ASOF, 599 TokenType.NATURAL, 600 TokenType.POSITIONAL, 601 } 602 603 JOIN_SIDES = { 604 TokenType.LEFT, 605 TokenType.RIGHT, 606 TokenType.FULL, 607 } 608 609 JOIN_KINDS = { 610 TokenType.ANTI, 611 TokenType.CROSS, 612 TokenType.INNER, 613 TokenType.OUTER, 614 TokenType.SEMI, 615 TokenType.STRAIGHT_JOIN, 616 } 617 618 JOIN_HINTS: t.Set[str] = set() 619 620 LAMBDAS = { 621 TokenType.ARROW: lambda self, expressions: self.expression( 622 exp.Lambda, 623 this=self._replace_lambda( 624 self._parse_assignment(), 625 expressions, 626 ), 627 expressions=expressions, 628 ), 629 TokenType.FARROW: lambda self, expressions: self.expression( 630 exp.Kwarg, 631 this=exp.var(expressions[0].name), 632 expression=self._parse_assignment(), 633 ), 634 } 635 636 COLUMN_OPERATORS = { 637 TokenType.DOT: None, 638 TokenType.DCOLON: lambda self, this, to: self.expression( 639 exp.Cast if self.STRICT_CAST else exp.TryCast, 640 this=this, 641 to=to, 642 ), 643 TokenType.ARROW: lambda self, this, path: self.expression( 644 exp.JSONExtract, 645 this=this, 646 expression=self.dialect.to_json_path(path), 647 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 648 ), 649 TokenType.DARROW: lambda self, this, path: self.expression( 650 exp.JSONExtractScalar, 651 this=this, 652 expression=self.dialect.to_json_path(path), 653 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 654 ), 655 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 656 exp.JSONBExtract, 657 this=this, 658 expression=path, 659 ), 660 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 661 exp.JSONBExtractScalar, 662 this=this, 663 expression=path, 664 ), 665 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 666 exp.JSONBContains, 667 this=this, 668 expression=key, 669 ), 670 } 671 672 EXPRESSION_PARSERS = { 673 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 674 exp.Column: lambda self: self._parse_column(), 675 exp.Condition: lambda self: self._parse_assignment(), 676 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 677 exp.Expression: lambda self: self._parse_expression(), 678 exp.From: lambda self: self._parse_from(joins=True), 679 exp.Group: lambda self: self._parse_group(), 680 exp.Having: lambda self: self._parse_having(), 681 exp.Identifier: lambda self: self._parse_id_var(), 682 exp.Join: lambda self: self._parse_join(), 683 exp.Lambda: lambda self: self._parse_lambda(), 684 exp.Lateral: lambda self: self._parse_lateral(), 685 exp.Limit: lambda self: self._parse_limit(), 686 exp.Offset: lambda self: self._parse_offset(), 687 exp.Order: lambda self: self._parse_order(), 688 exp.Ordered: lambda self: self._parse_ordered(), 689 exp.Properties: lambda self: self._parse_properties(), 690 exp.Qualify: lambda self: self._parse_qualify(), 691 exp.Returning: lambda self: self._parse_returning(), 692 exp.Select: lambda self: self._parse_select(), 693 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 694 exp.Table: lambda self: self._parse_table_parts(), 695 exp.TableAlias: lambda self: self._parse_table_alias(), 696 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 697 exp.Where: lambda self: self._parse_where(), 698 exp.Window: lambda self: self._parse_named_window(), 699 exp.With: lambda self: self._parse_with(), 700 "JOIN_TYPE": lambda self: self._parse_join_parts(), 701 } 702 703 STATEMENT_PARSERS = { 704 TokenType.ALTER: lambda self: self._parse_alter(), 705 TokenType.BEGIN: lambda self: self._parse_transaction(), 706 TokenType.CACHE: lambda self: self._parse_cache(), 707 TokenType.COMMENT: lambda self: self._parse_comment(), 708 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 709 TokenType.COPY: lambda self: self._parse_copy(), 710 TokenType.CREATE: lambda self: self._parse_create(), 711 TokenType.DELETE: lambda self: self._parse_delete(), 712 TokenType.DESC: lambda self: self._parse_describe(), 713 TokenType.DESCRIBE: lambda self: self._parse_describe(), 714 TokenType.DROP: lambda self: self._parse_drop(), 715 TokenType.INSERT: lambda self: self._parse_insert(), 716 TokenType.KILL: lambda self: self._parse_kill(), 717 TokenType.LOAD: lambda self: self._parse_load(), 718 TokenType.MERGE: lambda self: self._parse_merge(), 719 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 720 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 721 TokenType.REFRESH: lambda self: self._parse_refresh(), 722 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 723 TokenType.SET: lambda self: self._parse_set(), 724 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 725 TokenType.UNCACHE: lambda self: self._parse_uncache(), 726 TokenType.UPDATE: lambda self: self._parse_update(), 727 TokenType.USE: lambda self: self.expression( 728 exp.Use, 729 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 730 this=self._parse_table(schema=False), 731 ), 732 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 733 } 734 735 UNARY_PARSERS = { 736 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 737 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 738 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 739 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 740 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 741 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 742 } 743 744 STRING_PARSERS = { 745 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 746 exp.RawString, this=token.text 747 ), 748 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 749 exp.National, this=token.text 750 ), 751 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 752 TokenType.STRING: lambda self, token: self.expression( 753 exp.Literal, this=token.text, is_string=True 754 ), 755 TokenType.UNICODE_STRING: lambda self, token: self.expression( 756 exp.UnicodeString, 757 this=token.text, 758 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 759 ), 760 } 761 762 NUMERIC_PARSERS = { 763 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 764 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 765 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 766 TokenType.NUMBER: lambda self, token: self.expression( 767 exp.Literal, this=token.text, is_string=False 768 ), 769 } 770 771 PRIMARY_PARSERS = { 772 **STRING_PARSERS, 773 **NUMERIC_PARSERS, 774 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 775 TokenType.NULL: lambda self, _: self.expression(exp.Null), 776 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 777 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 778 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 779 TokenType.STAR: lambda self, _: self.expression( 780 exp.Star, 781 **{ 782 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 783 "replace": self._parse_star_op("REPLACE"), 784 "rename": self._parse_star_op("RENAME"), 785 }, 786 ), 787 } 788 789 PLACEHOLDER_PARSERS = { 790 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 791 TokenType.PARAMETER: lambda self: self._parse_parameter(), 792 TokenType.COLON: lambda self: ( 793 self.expression(exp.Placeholder, this=self._prev.text) 794 if self._match_set(self.ID_VAR_TOKENS) 795 else None 796 ), 797 } 798 799 RANGE_PARSERS = { 800 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 801 TokenType.GLOB: binary_range_parser(exp.Glob), 802 TokenType.ILIKE: binary_range_parser(exp.ILike), 803 TokenType.IN: lambda self, this: self._parse_in(this), 804 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 805 TokenType.IS: lambda self, this: self._parse_is(this), 806 TokenType.LIKE: binary_range_parser(exp.Like), 807 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 808 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 809 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 810 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 811 } 812 813 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 814 "ALLOWED_VALUES": lambda self: self.expression( 815 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 816 ), 817 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 818 "AUTO": lambda self: self._parse_auto_property(), 819 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 820 "BACKUP": lambda self: self.expression( 821 exp.BackupProperty, this=self._parse_var(any_token=True) 822 ), 823 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 824 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 825 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 826 "CHECKSUM": lambda self: self._parse_checksum(), 827 "CLUSTER BY": lambda self: self._parse_cluster(), 828 "CLUSTERED": lambda self: self._parse_clustered_by(), 829 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 830 exp.CollateProperty, **kwargs 831 ), 832 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 833 "CONTAINS": lambda self: self._parse_contains_property(), 834 "COPY": lambda self: self._parse_copy_property(), 835 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 836 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 837 "DEFINER": lambda self: self._parse_definer(), 838 "DETERMINISTIC": lambda self: self.expression( 839 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 840 ), 841 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 842 "DISTKEY": lambda self: self._parse_distkey(), 843 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 844 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 845 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 846 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 847 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 848 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 849 "FREESPACE": lambda self: self._parse_freespace(), 850 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 851 "HEAP": lambda self: self.expression(exp.HeapProperty), 852 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 853 "IMMUTABLE": lambda self: self.expression( 854 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 855 ), 856 "INHERITS": lambda self: self.expression( 857 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 858 ), 859 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 860 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 861 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 862 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 863 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 864 "LIKE": lambda self: self._parse_create_like(), 865 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 866 "LOCK": lambda self: self._parse_locking(), 867 "LOCKING": lambda self: self._parse_locking(), 868 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 869 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 870 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 871 "MODIFIES": lambda self: self._parse_modifies_property(), 872 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 873 "NO": lambda self: self._parse_no_property(), 874 "ON": lambda self: self._parse_on_property(), 875 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 876 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 877 "PARTITION": lambda self: self._parse_partitioned_of(), 878 "PARTITION BY": lambda self: self._parse_partitioned_by(), 879 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 880 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 881 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 882 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 883 "READS": lambda self: self._parse_reads_property(), 884 "REMOTE": lambda self: self._parse_remote_with_connection(), 885 "RETURNS": lambda self: self._parse_returns(), 886 "STRICT": lambda self: self.expression(exp.StrictProperty), 887 "ROW": lambda self: self._parse_row(), 888 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 889 "SAMPLE": lambda self: self.expression( 890 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 891 ), 892 "SECURE": lambda self: self.expression(exp.SecureProperty), 893 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 894 "SETTINGS": lambda self: self.expression( 895 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 896 ), 897 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 898 "SORTKEY": lambda self: self._parse_sortkey(), 899 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 900 "STABLE": lambda self: self.expression( 901 exp.StabilityProperty, this=exp.Literal.string("STABLE") 902 ), 903 "STORED": lambda self: self._parse_stored(), 904 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 905 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 906 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 907 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 908 "TO": lambda self: self._parse_to_table(), 909 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 910 "TRANSFORM": lambda self: self.expression( 911 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 912 ), 913 "TTL": lambda self: self._parse_ttl(), 914 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 916 "VOLATILE": lambda self: self._parse_volatile_property(), 917 "WITH": lambda self: self._parse_with_property(), 918 } 919 920 CONSTRAINT_PARSERS = { 921 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 922 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 923 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 924 "CHARACTER SET": lambda self: self.expression( 925 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 926 ), 927 "CHECK": lambda self: self.expression( 928 exp.CheckColumnConstraint, 929 this=self._parse_wrapped(self._parse_assignment), 930 enforced=self._match_text_seq("ENFORCED"), 931 ), 932 "COLLATE": lambda self: self.expression( 933 exp.CollateColumnConstraint, 934 this=self._parse_identifier() or self._parse_column(), 935 ), 936 "COMMENT": lambda self: self.expression( 937 exp.CommentColumnConstraint, this=self._parse_string() 938 ), 939 "COMPRESS": lambda self: self._parse_compress(), 940 "CLUSTERED": lambda self: self.expression( 941 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 942 ), 943 "NONCLUSTERED": lambda self: self.expression( 944 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 945 ), 946 "DEFAULT": lambda self: self.expression( 947 exp.DefaultColumnConstraint, this=self._parse_bitwise() 948 ), 949 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 950 "EPHEMERAL": lambda self: self.expression( 951 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 952 ), 953 "EXCLUDE": lambda self: self.expression( 954 exp.ExcludeColumnConstraint, this=self._parse_index_params() 955 ), 956 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 957 "FORMAT": lambda self: self.expression( 958 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 959 ), 960 "GENERATED": lambda self: self._parse_generated_as_identity(), 961 "IDENTITY": lambda self: self._parse_auto_increment(), 962 "INLINE": lambda self: self._parse_inline(), 963 "LIKE": lambda self: self._parse_create_like(), 964 "NOT": lambda self: self._parse_not_constraint(), 965 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 966 "ON": lambda self: ( 967 self._match(TokenType.UPDATE) 968 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 969 ) 970 or self.expression(exp.OnProperty, this=self._parse_id_var()), 971 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 972 "PERIOD": lambda self: self._parse_period_for_system_time(), 973 "PRIMARY KEY": lambda self: self._parse_primary_key(), 974 "REFERENCES": lambda self: self._parse_references(match=False), 975 "TITLE": lambda self: self.expression( 976 exp.TitleColumnConstraint, this=self._parse_var_or_string() 977 ), 978 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 979 "UNIQUE": lambda self: self._parse_unique(), 980 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 981 "WITH": lambda self: self.expression( 982 exp.Properties, expressions=self._parse_wrapped_properties() 983 ), 984 } 985 986 ALTER_PARSERS = { 987 "ADD": lambda self: self._parse_alter_table_add(), 988 "ALTER": lambda self: self._parse_alter_table_alter(), 989 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 990 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 991 "DROP": lambda self: self._parse_alter_table_drop(), 992 "RENAME": lambda self: self._parse_alter_table_rename(), 993 "SET": lambda self: self._parse_alter_table_set(), 994 } 995 996 ALTER_ALTER_PARSERS = { 997 "DISTKEY": lambda self: self._parse_alter_diststyle(), 998 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 999 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1000 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1001 } 1002 1003 SCHEMA_UNNAMED_CONSTRAINTS = { 1004 "CHECK", 1005 "EXCLUDE", 1006 "FOREIGN KEY", 1007 "LIKE", 1008 "PERIOD", 1009 "PRIMARY KEY", 1010 "UNIQUE", 1011 } 1012 1013 NO_PAREN_FUNCTION_PARSERS = { 1014 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1015 "CASE": lambda self: self._parse_case(), 1016 "CONNECT_BY_ROOT": lambda self: self.expression( 1017 exp.ConnectByRoot, this=self._parse_column() 1018 ), 1019 "IF": lambda self: self._parse_if(), 1020 "NEXT": lambda self: self._parse_next_value_for(), 1021 } 1022 1023 INVALID_FUNC_NAME_TOKENS = { 1024 TokenType.IDENTIFIER, 1025 TokenType.STRING, 1026 } 1027 1028 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1029 1030 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1031 1032 FUNCTION_PARSERS = { 1033 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1034 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1035 "DECODE": lambda self: self._parse_decode(), 1036 "EXTRACT": lambda self: self._parse_extract(), 1037 "GAP_FILL": lambda self: self._parse_gap_fill(), 1038 "JSON_OBJECT": lambda self: self._parse_json_object(), 1039 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1040 "JSON_TABLE": lambda self: self._parse_json_table(), 1041 "MATCH": lambda self: self._parse_match_against(), 1042 "OPENJSON": lambda self: self._parse_open_json(), 1043 "POSITION": lambda self: self._parse_position(), 1044 "PREDICT": lambda self: self._parse_predict(), 1045 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1046 "STRING_AGG": lambda self: self._parse_string_agg(), 1047 "SUBSTRING": lambda self: self._parse_substring(), 1048 "TRIM": lambda self: self._parse_trim(), 1049 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1050 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1051 } 1052 1053 QUERY_MODIFIER_PARSERS = { 1054 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1055 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1056 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1057 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1058 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1059 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1060 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1061 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1062 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1063 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1064 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1065 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1066 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1067 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1068 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1069 TokenType.CLUSTER_BY: lambda self: ( 1070 "cluster", 1071 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1072 ), 1073 TokenType.DISTRIBUTE_BY: lambda self: ( 1074 "distribute", 1075 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1076 ), 1077 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1078 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1079 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1080 } 1081 1082 SET_PARSERS = { 1083 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1084 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1085 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1086 "TRANSACTION": lambda self: self._parse_set_transaction(), 1087 } 1088 1089 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1090 1091 TYPE_LITERAL_PARSERS = { 1092 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1093 } 1094 1095 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1096 1097 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1098 1099 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1100 1101 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1102 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1103 "ISOLATION": ( 1104 ("LEVEL", "REPEATABLE", "READ"), 1105 ("LEVEL", "READ", "COMMITTED"), 1106 ("LEVEL", "READ", "UNCOMITTED"), 1107 ("LEVEL", "SERIALIZABLE"), 1108 ), 1109 "READ": ("WRITE", "ONLY"), 1110 } 1111 1112 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1113 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1114 ) 1115 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1116 1117 CREATE_SEQUENCE: OPTIONS_TYPE = { 1118 "SCALE": ("EXTEND", "NOEXTEND"), 1119 "SHARD": ("EXTEND", "NOEXTEND"), 1120 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1121 **dict.fromkeys( 1122 ( 1123 "SESSION", 1124 "GLOBAL", 1125 "KEEP", 1126 "NOKEEP", 1127 "ORDER", 1128 "NOORDER", 1129 "NOCACHE", 1130 "CYCLE", 1131 "NOCYCLE", 1132 "NOMINVALUE", 1133 "NOMAXVALUE", 1134 "NOSCALE", 1135 "NOSHARD", 1136 ), 1137 tuple(), 1138 ), 1139 } 1140 1141 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1142 1143 USABLES: OPTIONS_TYPE = dict.fromkeys( 1144 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1145 ) 1146 1147 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1148 1149 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1150 "TYPE": ("EVOLUTION",), 1151 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1152 } 1153 1154 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1155 "NOT": ("ENFORCED",), 1156 "MATCH": ( 1157 "FULL", 1158 "PARTIAL", 1159 "SIMPLE", 1160 ), 1161 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1162 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1163 } 1164 1165 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1166 1167 CLONE_KEYWORDS = {"CLONE", "COPY"} 1168 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1169 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1170 1171 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1172 1173 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1174 1175 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1176 1177 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1178 1179 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1180 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1181 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1182 1183 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1184 1185 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1186 1187 ADD_CONSTRAINT_TOKENS = { 1188 TokenType.CONSTRAINT, 1189 TokenType.FOREIGN_KEY, 1190 TokenType.INDEX, 1191 TokenType.KEY, 1192 TokenType.PRIMARY_KEY, 1193 TokenType.UNIQUE, 1194 } 1195 1196 DISTINCT_TOKENS = {TokenType.DISTINCT} 1197 1198 NULL_TOKENS = {TokenType.NULL} 1199 1200 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1201 1202 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1203 1204 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1205 1206 STRICT_CAST = True 1207 1208 PREFIXED_PIVOT_COLUMNS = False 1209 IDENTIFY_PIVOT_STRINGS = False 1210 1211 LOG_DEFAULTS_TO_LN = False 1212 1213 # Whether ADD is present for each column added by ALTER TABLE 1214 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1215 1216 # Whether the table sample clause expects CSV syntax 1217 TABLESAMPLE_CSV = False 1218 1219 # The default method used for table sampling 1220 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1221 1222 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1223 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1224 1225 # Whether the TRIM function expects the characters to trim as its first argument 1226 TRIM_PATTERN_FIRST = False 1227 1228 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1229 STRING_ALIASES = False 1230 1231 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1232 MODIFIERS_ATTACHED_TO_SET_OP = True 1233 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1234 1235 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1236 NO_PAREN_IF_COMMANDS = True 1237 1238 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1239 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1240 1241 # Whether the `:` operator is used to extract a value from a VARIANT column 1242 COLON_IS_VARIANT_EXTRACT = False 1243 1244 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1245 # If this is True and '(' is not found, the keyword will be treated as an identifier 1246 VALUES_FOLLOWED_BY_PAREN = True 1247 1248 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1249 SUPPORTS_IMPLICIT_UNNEST = False 1250 1251 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1252 INTERVAL_SPANS = True 1253 1254 # Whether a PARTITION clause can follow a table reference 1255 SUPPORTS_PARTITION_SELECTION = False 1256 1257 __slots__ = ( 1258 "error_level", 1259 "error_message_context", 1260 "max_errors", 1261 "dialect", 1262 "sql", 1263 "errors", 1264 "_tokens", 1265 "_index", 1266 "_curr", 1267 "_next", 1268 "_prev", 1269 "_prev_comments", 1270 ) 1271 1272 # Autofilled 1273 SHOW_TRIE: t.Dict = {} 1274 SET_TRIE: t.Dict = {} 1275 1276 def __init__( 1277 self, 1278 error_level: t.Optional[ErrorLevel] = None, 1279 error_message_context: int = 100, 1280 max_errors: int = 3, 1281 dialect: DialectType = None, 1282 ): 1283 from sqlglot.dialects import Dialect 1284 1285 self.error_level = error_level or ErrorLevel.IMMEDIATE 1286 self.error_message_context = error_message_context 1287 self.max_errors = max_errors 1288 self.dialect = Dialect.get_or_raise(dialect) 1289 self.reset() 1290 1291 def reset(self): 1292 self.sql = "" 1293 self.errors = [] 1294 self._tokens = [] 1295 self._index = 0 1296 self._curr = None 1297 self._next = None 1298 self._prev = None 1299 self._prev_comments = None 1300 1301 def parse( 1302 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1303 ) -> t.List[t.Optional[exp.Expression]]: 1304 """ 1305 Parses a list of tokens and returns a list of syntax trees, one tree 1306 per parsed SQL statement. 1307 1308 Args: 1309 raw_tokens: The list of tokens. 1310 sql: The original SQL string, used to produce helpful debug messages. 1311 1312 Returns: 1313 The list of the produced syntax trees. 1314 """ 1315 return self._parse( 1316 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1317 ) 1318 1319 def parse_into( 1320 self, 1321 expression_types: exp.IntoType, 1322 raw_tokens: t.List[Token], 1323 sql: t.Optional[str] = None, 1324 ) -> t.List[t.Optional[exp.Expression]]: 1325 """ 1326 Parses a list of tokens into a given Expression type. If a collection of Expression 1327 types is given instead, this method will try to parse the token list into each one 1328 of them, stopping at the first for which the parsing succeeds. 1329 1330 Args: 1331 expression_types: The expression type(s) to try and parse the token list into. 1332 raw_tokens: The list of tokens. 1333 sql: The original SQL string, used to produce helpful debug messages. 1334 1335 Returns: 1336 The target Expression. 1337 """ 1338 errors = [] 1339 for expression_type in ensure_list(expression_types): 1340 parser = self.EXPRESSION_PARSERS.get(expression_type) 1341 if not parser: 1342 raise TypeError(f"No parser registered for {expression_type}") 1343 1344 try: 1345 return self._parse(parser, raw_tokens, sql) 1346 except ParseError as e: 1347 e.errors[0]["into_expression"] = expression_type 1348 errors.append(e) 1349 1350 raise ParseError( 1351 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1352 errors=merge_errors(errors), 1353 ) from errors[-1] 1354 1355 def _parse( 1356 self, 1357 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1358 raw_tokens: t.List[Token], 1359 sql: t.Optional[str] = None, 1360 ) -> t.List[t.Optional[exp.Expression]]: 1361 self.reset() 1362 self.sql = sql or "" 1363 1364 total = len(raw_tokens) 1365 chunks: t.List[t.List[Token]] = [[]] 1366 1367 for i, token in enumerate(raw_tokens): 1368 if token.token_type == TokenType.SEMICOLON: 1369 if token.comments: 1370 chunks.append([token]) 1371 1372 if i < total - 1: 1373 chunks.append([]) 1374 else: 1375 chunks[-1].append(token) 1376 1377 expressions = [] 1378 1379 for tokens in chunks: 1380 self._index = -1 1381 self._tokens = tokens 1382 self._advance() 1383 1384 expressions.append(parse_method(self)) 1385 1386 if self._index < len(self._tokens): 1387 self.raise_error("Invalid expression / Unexpected token") 1388 1389 self.check_errors() 1390 1391 return expressions 1392 1393 def check_errors(self) -> None: 1394 """Logs or raises any found errors, depending on the chosen error level setting.""" 1395 if self.error_level == ErrorLevel.WARN: 1396 for error in self.errors: 1397 logger.error(str(error)) 1398 elif self.error_level == ErrorLevel.RAISE and self.errors: 1399 raise ParseError( 1400 concat_messages(self.errors, self.max_errors), 1401 errors=merge_errors(self.errors), 1402 ) 1403 1404 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1405 """ 1406 Appends an error in the list of recorded errors or raises it, depending on the chosen 1407 error level setting. 1408 """ 1409 token = token or self._curr or self._prev or Token.string("") 1410 start = token.start 1411 end = token.end + 1 1412 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1413 highlight = self.sql[start:end] 1414 end_context = self.sql[end : end + self.error_message_context] 1415 1416 error = ParseError.new( 1417 f"{message}. Line {token.line}, Col: {token.col}.\n" 1418 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1419 description=message, 1420 line=token.line, 1421 col=token.col, 1422 start_context=start_context, 1423 highlight=highlight, 1424 end_context=end_context, 1425 ) 1426 1427 if self.error_level == ErrorLevel.IMMEDIATE: 1428 raise error 1429 1430 self.errors.append(error) 1431 1432 def expression( 1433 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1434 ) -> E: 1435 """ 1436 Creates a new, validated Expression. 1437 1438 Args: 1439 exp_class: The expression class to instantiate. 1440 comments: An optional list of comments to attach to the expression. 1441 kwargs: The arguments to set for the expression along with their respective values. 1442 1443 Returns: 1444 The target expression. 1445 """ 1446 instance = exp_class(**kwargs) 1447 instance.add_comments(comments) if comments else self._add_comments(instance) 1448 return self.validate_expression(instance) 1449 1450 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1451 if expression and self._prev_comments: 1452 expression.add_comments(self._prev_comments) 1453 self._prev_comments = None 1454 1455 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1456 """ 1457 Validates an Expression, making sure that all its mandatory arguments are set. 1458 1459 Args: 1460 expression: The expression to validate. 1461 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1462 1463 Returns: 1464 The validated expression. 1465 """ 1466 if self.error_level != ErrorLevel.IGNORE: 1467 for error_message in expression.error_messages(args): 1468 self.raise_error(error_message) 1469 1470 return expression 1471 1472 def _find_sql(self, start: Token, end: Token) -> str: 1473 return self.sql[start.start : end.end + 1] 1474 1475 def _is_connected(self) -> bool: 1476 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1477 1478 def _advance(self, times: int = 1) -> None: 1479 self._index += times 1480 self._curr = seq_get(self._tokens, self._index) 1481 self._next = seq_get(self._tokens, self._index + 1) 1482 1483 if self._index > 0: 1484 self._prev = self._tokens[self._index - 1] 1485 self._prev_comments = self._prev.comments 1486 else: 1487 self._prev = None 1488 self._prev_comments = None 1489 1490 def _retreat(self, index: int) -> None: 1491 if index != self._index: 1492 self._advance(index - self._index) 1493 1494 def _warn_unsupported(self) -> None: 1495 if len(self._tokens) <= 1: 1496 return 1497 1498 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1499 # interested in emitting a warning for the one being currently processed. 1500 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1501 1502 logger.warning( 1503 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1504 ) 1505 1506 def _parse_command(self) -> exp.Command: 1507 self._warn_unsupported() 1508 return self.expression( 1509 exp.Command, 1510 comments=self._prev_comments, 1511 this=self._prev.text.upper(), 1512 expression=self._parse_string(), 1513 ) 1514 1515 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1516 """ 1517 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1518 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1519 solve this by setting & resetting the parser state accordingly 1520 """ 1521 index = self._index 1522 error_level = self.error_level 1523 1524 self.error_level = ErrorLevel.IMMEDIATE 1525 try: 1526 this = parse_method() 1527 except ParseError: 1528 this = None 1529 finally: 1530 if not this or retreat: 1531 self._retreat(index) 1532 self.error_level = error_level 1533 1534 return this 1535 1536 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1537 start = self._prev 1538 exists = self._parse_exists() if allow_exists else None 1539 1540 self._match(TokenType.ON) 1541 1542 materialized = self._match_text_seq("MATERIALIZED") 1543 kind = self._match_set(self.CREATABLES) and self._prev 1544 if not kind: 1545 return self._parse_as_command(start) 1546 1547 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1548 this = self._parse_user_defined_function(kind=kind.token_type) 1549 elif kind.token_type == TokenType.TABLE: 1550 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1551 elif kind.token_type == TokenType.COLUMN: 1552 this = self._parse_column() 1553 else: 1554 this = self._parse_id_var() 1555 1556 self._match(TokenType.IS) 1557 1558 return self.expression( 1559 exp.Comment, 1560 this=this, 1561 kind=kind.text, 1562 expression=self._parse_string(), 1563 exists=exists, 1564 materialized=materialized, 1565 ) 1566 1567 def _parse_to_table( 1568 self, 1569 ) -> exp.ToTableProperty: 1570 table = self._parse_table_parts(schema=True) 1571 return self.expression(exp.ToTableProperty, this=table) 1572 1573 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1574 def _parse_ttl(self) -> exp.Expression: 1575 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1576 this = self._parse_bitwise() 1577 1578 if self._match_text_seq("DELETE"): 1579 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1580 if self._match_text_seq("RECOMPRESS"): 1581 return self.expression( 1582 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1583 ) 1584 if self._match_text_seq("TO", "DISK"): 1585 return self.expression( 1586 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1587 ) 1588 if self._match_text_seq("TO", "VOLUME"): 1589 return self.expression( 1590 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1591 ) 1592 1593 return this 1594 1595 expressions = self._parse_csv(_parse_ttl_action) 1596 where = self._parse_where() 1597 group = self._parse_group() 1598 1599 aggregates = None 1600 if group and self._match(TokenType.SET): 1601 aggregates = self._parse_csv(self._parse_set_item) 1602 1603 return self.expression( 1604 exp.MergeTreeTTL, 1605 expressions=expressions, 1606 where=where, 1607 group=group, 1608 aggregates=aggregates, 1609 ) 1610 1611 def _parse_statement(self) -> t.Optional[exp.Expression]: 1612 if self._curr is None: 1613 return None 1614 1615 if self._match_set(self.STATEMENT_PARSERS): 1616 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1617 1618 if self._match_set(self.dialect.tokenizer.COMMANDS): 1619 return self._parse_command() 1620 1621 expression = self._parse_expression() 1622 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1623 return self._parse_query_modifiers(expression) 1624 1625 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1626 start = self._prev 1627 temporary = self._match(TokenType.TEMPORARY) 1628 materialized = self._match_text_seq("MATERIALIZED") 1629 1630 kind = self._match_set(self.CREATABLES) and self._prev.text 1631 if not kind: 1632 return self._parse_as_command(start) 1633 1634 if_exists = exists or self._parse_exists() 1635 table = self._parse_table_parts( 1636 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1637 ) 1638 1639 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1640 1641 if self._match(TokenType.L_PAREN, advance=False): 1642 expressions = self._parse_wrapped_csv(self._parse_types) 1643 else: 1644 expressions = None 1645 1646 return self.expression( 1647 exp.Drop, 1648 comments=start.comments, 1649 exists=if_exists, 1650 this=table, 1651 expressions=expressions, 1652 kind=kind.upper(), 1653 temporary=temporary, 1654 materialized=materialized, 1655 cascade=self._match_text_seq("CASCADE"), 1656 constraints=self._match_text_seq("CONSTRAINTS"), 1657 purge=self._match_text_seq("PURGE"), 1658 cluster=cluster, 1659 ) 1660 1661 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1662 return ( 1663 self._match_text_seq("IF") 1664 and (not not_ or self._match(TokenType.NOT)) 1665 and self._match(TokenType.EXISTS) 1666 ) 1667 1668 def _parse_create(self) -> exp.Create | exp.Command: 1669 # Note: this can't be None because we've matched a statement parser 1670 start = self._prev 1671 comments = self._prev_comments 1672 1673 replace = ( 1674 start.token_type == TokenType.REPLACE 1675 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1676 or self._match_pair(TokenType.OR, TokenType.ALTER) 1677 ) 1678 1679 unique = self._match(TokenType.UNIQUE) 1680 1681 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1682 clustered = True 1683 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1684 "COLUMNSTORE" 1685 ): 1686 clustered = False 1687 else: 1688 clustered = None 1689 1690 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1691 self._advance() 1692 1693 properties = None 1694 create_token = self._match_set(self.CREATABLES) and self._prev 1695 1696 if not create_token: 1697 # exp.Properties.Location.POST_CREATE 1698 properties = self._parse_properties() 1699 create_token = self._match_set(self.CREATABLES) and self._prev 1700 1701 if not properties or not create_token: 1702 return self._parse_as_command(start) 1703 1704 concurrently = self._match_text_seq("CONCURRENTLY") 1705 exists = self._parse_exists(not_=True) 1706 this = None 1707 expression: t.Optional[exp.Expression] = None 1708 indexes = None 1709 no_schema_binding = None 1710 begin = None 1711 end = None 1712 clone = None 1713 1714 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1715 nonlocal properties 1716 if properties and temp_props: 1717 properties.expressions.extend(temp_props.expressions) 1718 elif temp_props: 1719 properties = temp_props 1720 1721 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1722 this = self._parse_user_defined_function(kind=create_token.token_type) 1723 1724 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1725 extend_props(self._parse_properties()) 1726 1727 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1728 extend_props(self._parse_properties()) 1729 1730 if not expression: 1731 if self._match(TokenType.COMMAND): 1732 expression = self._parse_as_command(self._prev) 1733 else: 1734 begin = self._match(TokenType.BEGIN) 1735 return_ = self._match_text_seq("RETURN") 1736 1737 if self._match(TokenType.STRING, advance=False): 1738 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1739 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1740 expression = self._parse_string() 1741 extend_props(self._parse_properties()) 1742 else: 1743 expression = self._parse_statement() 1744 1745 end = self._match_text_seq("END") 1746 1747 if return_: 1748 expression = self.expression(exp.Return, this=expression) 1749 elif create_token.token_type == TokenType.INDEX: 1750 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1751 if not self._match(TokenType.ON): 1752 index = self._parse_id_var() 1753 anonymous = False 1754 else: 1755 index = None 1756 anonymous = True 1757 1758 this = self._parse_index(index=index, anonymous=anonymous) 1759 elif create_token.token_type in self.DB_CREATABLES: 1760 table_parts = self._parse_table_parts( 1761 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1762 ) 1763 1764 # exp.Properties.Location.POST_NAME 1765 self._match(TokenType.COMMA) 1766 extend_props(self._parse_properties(before=True)) 1767 1768 this = self._parse_schema(this=table_parts) 1769 1770 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1771 extend_props(self._parse_properties()) 1772 1773 self._match(TokenType.ALIAS) 1774 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1775 # exp.Properties.Location.POST_ALIAS 1776 extend_props(self._parse_properties()) 1777 1778 if create_token.token_type == TokenType.SEQUENCE: 1779 expression = self._parse_types() 1780 extend_props(self._parse_properties()) 1781 else: 1782 expression = self._parse_ddl_select() 1783 1784 if create_token.token_type == TokenType.TABLE: 1785 # exp.Properties.Location.POST_EXPRESSION 1786 extend_props(self._parse_properties()) 1787 1788 indexes = [] 1789 while True: 1790 index = self._parse_index() 1791 1792 # exp.Properties.Location.POST_INDEX 1793 extend_props(self._parse_properties()) 1794 1795 if not index: 1796 break 1797 else: 1798 self._match(TokenType.COMMA) 1799 indexes.append(index) 1800 elif create_token.token_type == TokenType.VIEW: 1801 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1802 no_schema_binding = True 1803 1804 shallow = self._match_text_seq("SHALLOW") 1805 1806 if self._match_texts(self.CLONE_KEYWORDS): 1807 copy = self._prev.text.lower() == "copy" 1808 clone = self.expression( 1809 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1810 ) 1811 1812 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1813 return self._parse_as_command(start) 1814 1815 return self.expression( 1816 exp.Create, 1817 comments=comments, 1818 this=this, 1819 kind=create_token.text.upper(), 1820 replace=replace, 1821 unique=unique, 1822 expression=expression, 1823 exists=exists, 1824 properties=properties, 1825 indexes=indexes, 1826 no_schema_binding=no_schema_binding, 1827 begin=begin, 1828 end=end, 1829 clone=clone, 1830 concurrently=concurrently, 1831 clustered=clustered, 1832 ) 1833 1834 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1835 seq = exp.SequenceProperties() 1836 1837 options = [] 1838 index = self._index 1839 1840 while self._curr: 1841 self._match(TokenType.COMMA) 1842 if self._match_text_seq("INCREMENT"): 1843 self._match_text_seq("BY") 1844 self._match_text_seq("=") 1845 seq.set("increment", self._parse_term()) 1846 elif self._match_text_seq("MINVALUE"): 1847 seq.set("minvalue", self._parse_term()) 1848 elif self._match_text_seq("MAXVALUE"): 1849 seq.set("maxvalue", self._parse_term()) 1850 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1851 self._match_text_seq("=") 1852 seq.set("start", self._parse_term()) 1853 elif self._match_text_seq("CACHE"): 1854 # T-SQL allows empty CACHE which is initialized dynamically 1855 seq.set("cache", self._parse_number() or True) 1856 elif self._match_text_seq("OWNED", "BY"): 1857 # "OWNED BY NONE" is the default 1858 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1859 else: 1860 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1861 if opt: 1862 options.append(opt) 1863 else: 1864 break 1865 1866 seq.set("options", options if options else None) 1867 return None if self._index == index else seq 1868 1869 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1870 # only used for teradata currently 1871 self._match(TokenType.COMMA) 1872 1873 kwargs = { 1874 "no": self._match_text_seq("NO"), 1875 "dual": self._match_text_seq("DUAL"), 1876 "before": self._match_text_seq("BEFORE"), 1877 "default": self._match_text_seq("DEFAULT"), 1878 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1879 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1880 "after": self._match_text_seq("AFTER"), 1881 "minimum": self._match_texts(("MIN", "MINIMUM")), 1882 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1883 } 1884 1885 if self._match_texts(self.PROPERTY_PARSERS): 1886 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1887 try: 1888 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1889 except TypeError: 1890 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1891 1892 return None 1893 1894 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1895 return self._parse_wrapped_csv(self._parse_property) 1896 1897 def _parse_property(self) -> t.Optional[exp.Expression]: 1898 if self._match_texts(self.PROPERTY_PARSERS): 1899 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1900 1901 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1902 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1903 1904 if self._match_text_seq("COMPOUND", "SORTKEY"): 1905 return self._parse_sortkey(compound=True) 1906 1907 if self._match_text_seq("SQL", "SECURITY"): 1908 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1909 1910 index = self._index 1911 key = self._parse_column() 1912 1913 if not self._match(TokenType.EQ): 1914 self._retreat(index) 1915 return self._parse_sequence_properties() 1916 1917 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1918 if isinstance(key, exp.Column): 1919 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1920 1921 value = self._parse_bitwise() or self._parse_var(any_token=True) 1922 1923 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1924 if isinstance(value, exp.Column): 1925 value = exp.var(value.name) 1926 1927 return self.expression(exp.Property, this=key, value=value) 1928 1929 def _parse_stored(self) -> exp.FileFormatProperty: 1930 self._match(TokenType.ALIAS) 1931 1932 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1933 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1934 1935 return self.expression( 1936 exp.FileFormatProperty, 1937 this=( 1938 self.expression( 1939 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1940 ) 1941 if input_format or output_format 1942 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1943 ), 1944 ) 1945 1946 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1947 field = self._parse_field() 1948 if isinstance(field, exp.Identifier) and not field.quoted: 1949 field = exp.var(field) 1950 1951 return field 1952 1953 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1954 self._match(TokenType.EQ) 1955 self._match(TokenType.ALIAS) 1956 1957 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1958 1959 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1960 properties = [] 1961 while True: 1962 if before: 1963 prop = self._parse_property_before() 1964 else: 1965 prop = self._parse_property() 1966 if not prop: 1967 break 1968 for p in ensure_list(prop): 1969 properties.append(p) 1970 1971 if properties: 1972 return self.expression(exp.Properties, expressions=properties) 1973 1974 return None 1975 1976 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1977 return self.expression( 1978 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1979 ) 1980 1981 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1982 if self._index >= 2: 1983 pre_volatile_token = self._tokens[self._index - 2] 1984 else: 1985 pre_volatile_token = None 1986 1987 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1988 return exp.VolatileProperty() 1989 1990 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1991 1992 def _parse_retention_period(self) -> exp.Var: 1993 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1994 number = self._parse_number() 1995 number_str = f"{number} " if number else "" 1996 unit = self._parse_var(any_token=True) 1997 return exp.var(f"{number_str}{unit}") 1998 1999 def _parse_system_versioning_property( 2000 self, with_: bool = False 2001 ) -> exp.WithSystemVersioningProperty: 2002 self._match(TokenType.EQ) 2003 prop = self.expression( 2004 exp.WithSystemVersioningProperty, 2005 **{ # type: ignore 2006 "on": True, 2007 "with": with_, 2008 }, 2009 ) 2010 2011 if self._match_text_seq("OFF"): 2012 prop.set("on", False) 2013 return prop 2014 2015 self._match(TokenType.ON) 2016 if self._match(TokenType.L_PAREN): 2017 while self._curr and not self._match(TokenType.R_PAREN): 2018 if self._match_text_seq("HISTORY_TABLE", "="): 2019 prop.set("this", self._parse_table_parts()) 2020 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2021 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2022 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2023 prop.set("retention_period", self._parse_retention_period()) 2024 2025 self._match(TokenType.COMMA) 2026 2027 return prop 2028 2029 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2030 self._match(TokenType.EQ) 2031 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2032 prop = self.expression(exp.DataDeletionProperty, on=on) 2033 2034 if self._match(TokenType.L_PAREN): 2035 while self._curr and not self._match(TokenType.R_PAREN): 2036 if self._match_text_seq("FILTER_COLUMN", "="): 2037 prop.set("filter_column", self._parse_column()) 2038 elif self._match_text_seq("RETENTION_PERIOD", "="): 2039 prop.set("retention_period", self._parse_retention_period()) 2040 2041 self._match(TokenType.COMMA) 2042 2043 return prop 2044 2045 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2046 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2047 prop = self._parse_system_versioning_property(with_=True) 2048 self._match_r_paren() 2049 return prop 2050 2051 if self._match(TokenType.L_PAREN, advance=False): 2052 return self._parse_wrapped_properties() 2053 2054 if self._match_text_seq("JOURNAL"): 2055 return self._parse_withjournaltable() 2056 2057 if self._match_texts(self.VIEW_ATTRIBUTES): 2058 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2059 2060 if self._match_text_seq("DATA"): 2061 return self._parse_withdata(no=False) 2062 elif self._match_text_seq("NO", "DATA"): 2063 return self._parse_withdata(no=True) 2064 2065 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2066 return self._parse_serde_properties(with_=True) 2067 2068 if self._match(TokenType.SCHEMA): 2069 return self.expression( 2070 exp.WithSchemaBindingProperty, 2071 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2072 ) 2073 2074 if not self._next: 2075 return None 2076 2077 return self._parse_withisolatedloading() 2078 2079 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2080 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2081 self._match(TokenType.EQ) 2082 2083 user = self._parse_id_var() 2084 self._match(TokenType.PARAMETER) 2085 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2086 2087 if not user or not host: 2088 return None 2089 2090 return exp.DefinerProperty(this=f"{user}@{host}") 2091 2092 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2093 self._match(TokenType.TABLE) 2094 self._match(TokenType.EQ) 2095 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2096 2097 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2098 return self.expression(exp.LogProperty, no=no) 2099 2100 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2101 return self.expression(exp.JournalProperty, **kwargs) 2102 2103 def _parse_checksum(self) -> exp.ChecksumProperty: 2104 self._match(TokenType.EQ) 2105 2106 on = None 2107 if self._match(TokenType.ON): 2108 on = True 2109 elif self._match_text_seq("OFF"): 2110 on = False 2111 2112 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2113 2114 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2115 return self.expression( 2116 exp.Cluster, 2117 expressions=( 2118 self._parse_wrapped_csv(self._parse_ordered) 2119 if wrapped 2120 else self._parse_csv(self._parse_ordered) 2121 ), 2122 ) 2123 2124 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2125 self._match_text_seq("BY") 2126 2127 self._match_l_paren() 2128 expressions = self._parse_csv(self._parse_column) 2129 self._match_r_paren() 2130 2131 if self._match_text_seq("SORTED", "BY"): 2132 self._match_l_paren() 2133 sorted_by = self._parse_csv(self._parse_ordered) 2134 self._match_r_paren() 2135 else: 2136 sorted_by = None 2137 2138 self._match(TokenType.INTO) 2139 buckets = self._parse_number() 2140 self._match_text_seq("BUCKETS") 2141 2142 return self.expression( 2143 exp.ClusteredByProperty, 2144 expressions=expressions, 2145 sorted_by=sorted_by, 2146 buckets=buckets, 2147 ) 2148 2149 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2150 if not self._match_text_seq("GRANTS"): 2151 self._retreat(self._index - 1) 2152 return None 2153 2154 return self.expression(exp.CopyGrantsProperty) 2155 2156 def _parse_freespace(self) -> exp.FreespaceProperty: 2157 self._match(TokenType.EQ) 2158 return self.expression( 2159 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2160 ) 2161 2162 def _parse_mergeblockratio( 2163 self, no: bool = False, default: bool = False 2164 ) -> exp.MergeBlockRatioProperty: 2165 if self._match(TokenType.EQ): 2166 return self.expression( 2167 exp.MergeBlockRatioProperty, 2168 this=self._parse_number(), 2169 percent=self._match(TokenType.PERCENT), 2170 ) 2171 2172 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2173 2174 def _parse_datablocksize( 2175 self, 2176 default: t.Optional[bool] = None, 2177 minimum: t.Optional[bool] = None, 2178 maximum: t.Optional[bool] = None, 2179 ) -> exp.DataBlocksizeProperty: 2180 self._match(TokenType.EQ) 2181 size = self._parse_number() 2182 2183 units = None 2184 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2185 units = self._prev.text 2186 2187 return self.expression( 2188 exp.DataBlocksizeProperty, 2189 size=size, 2190 units=units, 2191 default=default, 2192 minimum=minimum, 2193 maximum=maximum, 2194 ) 2195 2196 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2197 self._match(TokenType.EQ) 2198 always = self._match_text_seq("ALWAYS") 2199 manual = self._match_text_seq("MANUAL") 2200 never = self._match_text_seq("NEVER") 2201 default = self._match_text_seq("DEFAULT") 2202 2203 autotemp = None 2204 if self._match_text_seq("AUTOTEMP"): 2205 autotemp = self._parse_schema() 2206 2207 return self.expression( 2208 exp.BlockCompressionProperty, 2209 always=always, 2210 manual=manual, 2211 never=never, 2212 default=default, 2213 autotemp=autotemp, 2214 ) 2215 2216 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2217 index = self._index 2218 no = self._match_text_seq("NO") 2219 concurrent = self._match_text_seq("CONCURRENT") 2220 2221 if not self._match_text_seq("ISOLATED", "LOADING"): 2222 self._retreat(index) 2223 return None 2224 2225 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2226 return self.expression( 2227 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2228 ) 2229 2230 def _parse_locking(self) -> exp.LockingProperty: 2231 if self._match(TokenType.TABLE): 2232 kind = "TABLE" 2233 elif self._match(TokenType.VIEW): 2234 kind = "VIEW" 2235 elif self._match(TokenType.ROW): 2236 kind = "ROW" 2237 elif self._match_text_seq("DATABASE"): 2238 kind = "DATABASE" 2239 else: 2240 kind = None 2241 2242 if kind in ("DATABASE", "TABLE", "VIEW"): 2243 this = self._parse_table_parts() 2244 else: 2245 this = None 2246 2247 if self._match(TokenType.FOR): 2248 for_or_in = "FOR" 2249 elif self._match(TokenType.IN): 2250 for_or_in = "IN" 2251 else: 2252 for_or_in = None 2253 2254 if self._match_text_seq("ACCESS"): 2255 lock_type = "ACCESS" 2256 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2257 lock_type = "EXCLUSIVE" 2258 elif self._match_text_seq("SHARE"): 2259 lock_type = "SHARE" 2260 elif self._match_text_seq("READ"): 2261 lock_type = "READ" 2262 elif self._match_text_seq("WRITE"): 2263 lock_type = "WRITE" 2264 elif self._match_text_seq("CHECKSUM"): 2265 lock_type = "CHECKSUM" 2266 else: 2267 lock_type = None 2268 2269 override = self._match_text_seq("OVERRIDE") 2270 2271 return self.expression( 2272 exp.LockingProperty, 2273 this=this, 2274 kind=kind, 2275 for_or_in=for_or_in, 2276 lock_type=lock_type, 2277 override=override, 2278 ) 2279 2280 def _parse_partition_by(self) -> t.List[exp.Expression]: 2281 if self._match(TokenType.PARTITION_BY): 2282 return self._parse_csv(self._parse_assignment) 2283 return [] 2284 2285 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2286 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2287 if self._match_text_seq("MINVALUE"): 2288 return exp.var("MINVALUE") 2289 if self._match_text_seq("MAXVALUE"): 2290 return exp.var("MAXVALUE") 2291 return self._parse_bitwise() 2292 2293 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2294 expression = None 2295 from_expressions = None 2296 to_expressions = None 2297 2298 if self._match(TokenType.IN): 2299 this = self._parse_wrapped_csv(self._parse_bitwise) 2300 elif self._match(TokenType.FROM): 2301 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2302 self._match_text_seq("TO") 2303 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2304 elif self._match_text_seq("WITH", "(", "MODULUS"): 2305 this = self._parse_number() 2306 self._match_text_seq(",", "REMAINDER") 2307 expression = self._parse_number() 2308 self._match_r_paren() 2309 else: 2310 self.raise_error("Failed to parse partition bound spec.") 2311 2312 return self.expression( 2313 exp.PartitionBoundSpec, 2314 this=this, 2315 expression=expression, 2316 from_expressions=from_expressions, 2317 to_expressions=to_expressions, 2318 ) 2319 2320 # https://www.postgresql.org/docs/current/sql-createtable.html 2321 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2322 if not self._match_text_seq("OF"): 2323 self._retreat(self._index - 1) 2324 return None 2325 2326 this = self._parse_table(schema=True) 2327 2328 if self._match(TokenType.DEFAULT): 2329 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2330 elif self._match_text_seq("FOR", "VALUES"): 2331 expression = self._parse_partition_bound_spec() 2332 else: 2333 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2334 2335 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2336 2337 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2338 self._match(TokenType.EQ) 2339 return self.expression( 2340 exp.PartitionedByProperty, 2341 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2342 ) 2343 2344 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2345 if self._match_text_seq("AND", "STATISTICS"): 2346 statistics = True 2347 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2348 statistics = False 2349 else: 2350 statistics = None 2351 2352 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2353 2354 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2355 if self._match_text_seq("SQL"): 2356 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2357 return None 2358 2359 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2360 if self._match_text_seq("SQL", "DATA"): 2361 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2362 return None 2363 2364 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2365 if self._match_text_seq("PRIMARY", "INDEX"): 2366 return exp.NoPrimaryIndexProperty() 2367 if self._match_text_seq("SQL"): 2368 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2369 return None 2370 2371 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2372 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2373 return exp.OnCommitProperty() 2374 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2375 return exp.OnCommitProperty(delete=True) 2376 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2377 2378 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2379 if self._match_text_seq("SQL", "DATA"): 2380 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2381 return None 2382 2383 def _parse_distkey(self) -> exp.DistKeyProperty: 2384 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2385 2386 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2387 table = self._parse_table(schema=True) 2388 2389 options = [] 2390 while self._match_texts(("INCLUDING", "EXCLUDING")): 2391 this = self._prev.text.upper() 2392 2393 id_var = self._parse_id_var() 2394 if not id_var: 2395 return None 2396 2397 options.append( 2398 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2399 ) 2400 2401 return self.expression(exp.LikeProperty, this=table, expressions=options) 2402 2403 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2404 return self.expression( 2405 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2406 ) 2407 2408 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2409 self._match(TokenType.EQ) 2410 return self.expression( 2411 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2412 ) 2413 2414 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2415 self._match_text_seq("WITH", "CONNECTION") 2416 return self.expression( 2417 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2418 ) 2419 2420 def _parse_returns(self) -> exp.ReturnsProperty: 2421 value: t.Optional[exp.Expression] 2422 null = None 2423 is_table = self._match(TokenType.TABLE) 2424 2425 if is_table: 2426 if self._match(TokenType.LT): 2427 value = self.expression( 2428 exp.Schema, 2429 this="TABLE", 2430 expressions=self._parse_csv(self._parse_struct_types), 2431 ) 2432 if not self._match(TokenType.GT): 2433 self.raise_error("Expecting >") 2434 else: 2435 value = self._parse_schema(exp.var("TABLE")) 2436 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2437 null = True 2438 value = None 2439 else: 2440 value = self._parse_types() 2441 2442 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2443 2444 def _parse_describe(self) -> exp.Describe: 2445 kind = self._match_set(self.CREATABLES) and self._prev.text 2446 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2447 if self._match(TokenType.DOT): 2448 style = None 2449 self._retreat(self._index - 2) 2450 this = self._parse_table(schema=True) 2451 properties = self._parse_properties() 2452 expressions = properties.expressions if properties else None 2453 return self.expression( 2454 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2455 ) 2456 2457 def _parse_insert(self) -> exp.Insert: 2458 comments = ensure_list(self._prev_comments) 2459 hint = self._parse_hint() 2460 overwrite = self._match(TokenType.OVERWRITE) 2461 ignore = self._match(TokenType.IGNORE) 2462 local = self._match_text_seq("LOCAL") 2463 alternative = None 2464 is_function = None 2465 2466 if self._match_text_seq("DIRECTORY"): 2467 this: t.Optional[exp.Expression] = self.expression( 2468 exp.Directory, 2469 this=self._parse_var_or_string(), 2470 local=local, 2471 row_format=self._parse_row_format(match_row=True), 2472 ) 2473 else: 2474 if self._match(TokenType.OR): 2475 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2476 2477 self._match(TokenType.INTO) 2478 comments += ensure_list(self._prev_comments) 2479 self._match(TokenType.TABLE) 2480 is_function = self._match(TokenType.FUNCTION) 2481 2482 this = ( 2483 self._parse_table(schema=True, parse_partition=True) 2484 if not is_function 2485 else self._parse_function() 2486 ) 2487 2488 returning = self._parse_returning() 2489 2490 return self.expression( 2491 exp.Insert, 2492 comments=comments, 2493 hint=hint, 2494 is_function=is_function, 2495 this=this, 2496 stored=self._match_text_seq("STORED") and self._parse_stored(), 2497 by_name=self._match_text_seq("BY", "NAME"), 2498 exists=self._parse_exists(), 2499 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2500 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2501 conflict=self._parse_on_conflict(), 2502 returning=returning or self._parse_returning(), 2503 overwrite=overwrite, 2504 alternative=alternative, 2505 ignore=ignore, 2506 ) 2507 2508 def _parse_kill(self) -> exp.Kill: 2509 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2510 2511 return self.expression( 2512 exp.Kill, 2513 this=self._parse_primary(), 2514 kind=kind, 2515 ) 2516 2517 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2518 conflict = self._match_text_seq("ON", "CONFLICT") 2519 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2520 2521 if not conflict and not duplicate: 2522 return None 2523 2524 conflict_keys = None 2525 constraint = None 2526 2527 if conflict: 2528 if self._match_text_seq("ON", "CONSTRAINT"): 2529 constraint = self._parse_id_var() 2530 elif self._match(TokenType.L_PAREN): 2531 conflict_keys = self._parse_csv(self._parse_id_var) 2532 self._match_r_paren() 2533 2534 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2535 if self._prev.token_type == TokenType.UPDATE: 2536 self._match(TokenType.SET) 2537 expressions = self._parse_csv(self._parse_equality) 2538 else: 2539 expressions = None 2540 2541 return self.expression( 2542 exp.OnConflict, 2543 duplicate=duplicate, 2544 expressions=expressions, 2545 action=action, 2546 conflict_keys=conflict_keys, 2547 constraint=constraint, 2548 ) 2549 2550 def _parse_returning(self) -> t.Optional[exp.Returning]: 2551 if not self._match(TokenType.RETURNING): 2552 return None 2553 return self.expression( 2554 exp.Returning, 2555 expressions=self._parse_csv(self._parse_expression), 2556 into=self._match(TokenType.INTO) and self._parse_table_part(), 2557 ) 2558 2559 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2560 if not self._match(TokenType.FORMAT): 2561 return None 2562 return self._parse_row_format() 2563 2564 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2565 index = self._index 2566 with_ = with_ or self._match_text_seq("WITH") 2567 2568 if not self._match(TokenType.SERDE_PROPERTIES): 2569 self._retreat(index) 2570 return None 2571 return self.expression( 2572 exp.SerdeProperties, 2573 **{ # type: ignore 2574 "expressions": self._parse_wrapped_properties(), 2575 "with": with_, 2576 }, 2577 ) 2578 2579 def _parse_row_format( 2580 self, match_row: bool = False 2581 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2582 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2583 return None 2584 2585 if self._match_text_seq("SERDE"): 2586 this = self._parse_string() 2587 2588 serde_properties = self._parse_serde_properties() 2589 2590 return self.expression( 2591 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2592 ) 2593 2594 self._match_text_seq("DELIMITED") 2595 2596 kwargs = {} 2597 2598 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2599 kwargs["fields"] = self._parse_string() 2600 if self._match_text_seq("ESCAPED", "BY"): 2601 kwargs["escaped"] = self._parse_string() 2602 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2603 kwargs["collection_items"] = self._parse_string() 2604 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2605 kwargs["map_keys"] = self._parse_string() 2606 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2607 kwargs["lines"] = self._parse_string() 2608 if self._match_text_seq("NULL", "DEFINED", "AS"): 2609 kwargs["null"] = self._parse_string() 2610 2611 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2612 2613 def _parse_load(self) -> exp.LoadData | exp.Command: 2614 if self._match_text_seq("DATA"): 2615 local = self._match_text_seq("LOCAL") 2616 self._match_text_seq("INPATH") 2617 inpath = self._parse_string() 2618 overwrite = self._match(TokenType.OVERWRITE) 2619 self._match_pair(TokenType.INTO, TokenType.TABLE) 2620 2621 return self.expression( 2622 exp.LoadData, 2623 this=self._parse_table(schema=True), 2624 local=local, 2625 overwrite=overwrite, 2626 inpath=inpath, 2627 partition=self._parse_partition(), 2628 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2629 serde=self._match_text_seq("SERDE") and self._parse_string(), 2630 ) 2631 return self._parse_as_command(self._prev) 2632 2633 def _parse_delete(self) -> exp.Delete: 2634 # This handles MySQL's "Multiple-Table Syntax" 2635 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2636 tables = None 2637 comments = self._prev_comments 2638 if not self._match(TokenType.FROM, advance=False): 2639 tables = self._parse_csv(self._parse_table) or None 2640 2641 returning = self._parse_returning() 2642 2643 return self.expression( 2644 exp.Delete, 2645 comments=comments, 2646 tables=tables, 2647 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2648 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2649 where=self._parse_where(), 2650 returning=returning or self._parse_returning(), 2651 limit=self._parse_limit(), 2652 ) 2653 2654 def _parse_update(self) -> exp.Update: 2655 comments = self._prev_comments 2656 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2657 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2658 returning = self._parse_returning() 2659 return self.expression( 2660 exp.Update, 2661 comments=comments, 2662 **{ # type: ignore 2663 "this": this, 2664 "expressions": expressions, 2665 "from": self._parse_from(joins=True), 2666 "where": self._parse_where(), 2667 "returning": returning or self._parse_returning(), 2668 "order": self._parse_order(), 2669 "limit": self._parse_limit(), 2670 }, 2671 ) 2672 2673 def _parse_uncache(self) -> exp.Uncache: 2674 if not self._match(TokenType.TABLE): 2675 self.raise_error("Expecting TABLE after UNCACHE") 2676 2677 return self.expression( 2678 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2679 ) 2680 2681 def _parse_cache(self) -> exp.Cache: 2682 lazy = self._match_text_seq("LAZY") 2683 self._match(TokenType.TABLE) 2684 table = self._parse_table(schema=True) 2685 2686 options = [] 2687 if self._match_text_seq("OPTIONS"): 2688 self._match_l_paren() 2689 k = self._parse_string() 2690 self._match(TokenType.EQ) 2691 v = self._parse_string() 2692 options = [k, v] 2693 self._match_r_paren() 2694 2695 self._match(TokenType.ALIAS) 2696 return self.expression( 2697 exp.Cache, 2698 this=table, 2699 lazy=lazy, 2700 options=options, 2701 expression=self._parse_select(nested=True), 2702 ) 2703 2704 def _parse_partition(self) -> t.Optional[exp.Partition]: 2705 if not self._match(TokenType.PARTITION): 2706 return None 2707 2708 return self.expression( 2709 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2710 ) 2711 2712 def _parse_value(self) -> t.Optional[exp.Tuple]: 2713 if self._match(TokenType.L_PAREN): 2714 expressions = self._parse_csv(self._parse_expression) 2715 self._match_r_paren() 2716 return self.expression(exp.Tuple, expressions=expressions) 2717 2718 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2719 expression = self._parse_expression() 2720 if expression: 2721 return self.expression(exp.Tuple, expressions=[expression]) 2722 return None 2723 2724 def _parse_projections(self) -> t.List[exp.Expression]: 2725 return self._parse_expressions() 2726 2727 def _parse_select( 2728 self, 2729 nested: bool = False, 2730 table: bool = False, 2731 parse_subquery_alias: bool = True, 2732 parse_set_operation: bool = True, 2733 ) -> t.Optional[exp.Expression]: 2734 cte = self._parse_with() 2735 2736 if cte: 2737 this = self._parse_statement() 2738 2739 if not this: 2740 self.raise_error("Failed to parse any statement following CTE") 2741 return cte 2742 2743 if "with" in this.arg_types: 2744 this.set("with", cte) 2745 else: 2746 self.raise_error(f"{this.key} does not support CTE") 2747 this = cte 2748 2749 return this 2750 2751 # duckdb supports leading with FROM x 2752 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2753 2754 if self._match(TokenType.SELECT): 2755 comments = self._prev_comments 2756 2757 hint = self._parse_hint() 2758 2759 if self._next and not self._next.token_type == TokenType.DOT: 2760 all_ = self._match(TokenType.ALL) 2761 distinct = self._match_set(self.DISTINCT_TOKENS) 2762 else: 2763 all_, distinct = None, None 2764 2765 kind = ( 2766 self._match(TokenType.ALIAS) 2767 and self._match_texts(("STRUCT", "VALUE")) 2768 and self._prev.text.upper() 2769 ) 2770 2771 if distinct: 2772 distinct = self.expression( 2773 exp.Distinct, 2774 on=self._parse_value() if self._match(TokenType.ON) else None, 2775 ) 2776 2777 if all_ and distinct: 2778 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2779 2780 limit = self._parse_limit(top=True) 2781 projections = self._parse_projections() 2782 2783 this = self.expression( 2784 exp.Select, 2785 kind=kind, 2786 hint=hint, 2787 distinct=distinct, 2788 expressions=projections, 2789 limit=limit, 2790 ) 2791 this.comments = comments 2792 2793 into = self._parse_into() 2794 if into: 2795 this.set("into", into) 2796 2797 if not from_: 2798 from_ = self._parse_from() 2799 2800 if from_: 2801 this.set("from", from_) 2802 2803 this = self._parse_query_modifiers(this) 2804 elif (table or nested) and self._match(TokenType.L_PAREN): 2805 if self._match(TokenType.PIVOT): 2806 this = self._parse_simplified_pivot() 2807 elif self._match(TokenType.FROM): 2808 this = exp.select("*").from_( 2809 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2810 ) 2811 else: 2812 this = ( 2813 self._parse_table() 2814 if table 2815 else self._parse_select(nested=True, parse_set_operation=False) 2816 ) 2817 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2818 2819 self._match_r_paren() 2820 2821 # We return early here so that the UNION isn't attached to the subquery by the 2822 # following call to _parse_set_operations, but instead becomes the parent node 2823 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2824 elif self._match(TokenType.VALUES, advance=False): 2825 this = self._parse_derived_table_values() 2826 elif from_: 2827 this = exp.select("*").from_(from_.this, copy=False) 2828 else: 2829 this = None 2830 2831 if parse_set_operation: 2832 return self._parse_set_operations(this) 2833 return this 2834 2835 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2836 if not skip_with_token and not self._match(TokenType.WITH): 2837 return None 2838 2839 comments = self._prev_comments 2840 recursive = self._match(TokenType.RECURSIVE) 2841 2842 expressions = [] 2843 while True: 2844 expressions.append(self._parse_cte()) 2845 2846 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2847 break 2848 else: 2849 self._match(TokenType.WITH) 2850 2851 return self.expression( 2852 exp.With, comments=comments, expressions=expressions, recursive=recursive 2853 ) 2854 2855 def _parse_cte(self) -> exp.CTE: 2856 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2857 if not alias or not alias.this: 2858 self.raise_error("Expected CTE to have alias") 2859 2860 self._match(TokenType.ALIAS) 2861 comments = self._prev_comments 2862 2863 if self._match_text_seq("NOT", "MATERIALIZED"): 2864 materialized = False 2865 elif self._match_text_seq("MATERIALIZED"): 2866 materialized = True 2867 else: 2868 materialized = None 2869 2870 return self.expression( 2871 exp.CTE, 2872 this=self._parse_wrapped(self._parse_statement), 2873 alias=alias, 2874 materialized=materialized, 2875 comments=comments, 2876 ) 2877 2878 def _parse_table_alias( 2879 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2880 ) -> t.Optional[exp.TableAlias]: 2881 any_token = self._match(TokenType.ALIAS) 2882 alias = ( 2883 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2884 or self._parse_string_as_identifier() 2885 ) 2886 2887 index = self._index 2888 if self._match(TokenType.L_PAREN): 2889 columns = self._parse_csv(self._parse_function_parameter) 2890 self._match_r_paren() if columns else self._retreat(index) 2891 else: 2892 columns = None 2893 2894 if not alias and not columns: 2895 return None 2896 2897 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2898 2899 # We bubble up comments from the Identifier to the TableAlias 2900 if isinstance(alias, exp.Identifier): 2901 table_alias.add_comments(alias.pop_comments()) 2902 2903 return table_alias 2904 2905 def _parse_subquery( 2906 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2907 ) -> t.Optional[exp.Subquery]: 2908 if not this: 2909 return None 2910 2911 return self.expression( 2912 exp.Subquery, 2913 this=this, 2914 pivots=self._parse_pivots(), 2915 alias=self._parse_table_alias() if parse_alias else None, 2916 ) 2917 2918 def _implicit_unnests_to_explicit(self, this: E) -> E: 2919 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2920 2921 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2922 for i, join in enumerate(this.args.get("joins") or []): 2923 table = join.this 2924 normalized_table = table.copy() 2925 normalized_table.meta["maybe_column"] = True 2926 normalized_table = _norm(normalized_table, dialect=self.dialect) 2927 2928 if isinstance(table, exp.Table) and not join.args.get("on"): 2929 if normalized_table.parts[0].name in refs: 2930 table_as_column = table.to_column() 2931 unnest = exp.Unnest(expressions=[table_as_column]) 2932 2933 # Table.to_column creates a parent Alias node that we want to convert to 2934 # a TableAlias and attach to the Unnest, so it matches the parser's output 2935 if isinstance(table.args.get("alias"), exp.TableAlias): 2936 table_as_column.replace(table_as_column.this) 2937 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2938 2939 table.replace(unnest) 2940 2941 refs.add(normalized_table.alias_or_name) 2942 2943 return this 2944 2945 def _parse_query_modifiers( 2946 self, this: t.Optional[exp.Expression] 2947 ) -> t.Optional[exp.Expression]: 2948 if isinstance(this, (exp.Query, exp.Table)): 2949 for join in self._parse_joins(): 2950 this.append("joins", join) 2951 for lateral in iter(self._parse_lateral, None): 2952 this.append("laterals", lateral) 2953 2954 while True: 2955 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2956 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2957 key, expression = parser(self) 2958 2959 if expression: 2960 this.set(key, expression) 2961 if key == "limit": 2962 offset = expression.args.pop("offset", None) 2963 2964 if offset: 2965 offset = exp.Offset(expression=offset) 2966 this.set("offset", offset) 2967 2968 limit_by_expressions = expression.expressions 2969 expression.set("expressions", None) 2970 offset.set("expressions", limit_by_expressions) 2971 continue 2972 break 2973 2974 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2975 this = self._implicit_unnests_to_explicit(this) 2976 2977 return this 2978 2979 def _parse_hint(self) -> t.Optional[exp.Hint]: 2980 if self._match(TokenType.HINT): 2981 hints = [] 2982 for hint in iter( 2983 lambda: self._parse_csv( 2984 lambda: self._parse_function() or self._parse_var(upper=True) 2985 ), 2986 [], 2987 ): 2988 hints.extend(hint) 2989 2990 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2991 self.raise_error("Expected */ after HINT") 2992 2993 return self.expression(exp.Hint, expressions=hints) 2994 2995 return None 2996 2997 def _parse_into(self) -> t.Optional[exp.Into]: 2998 if not self._match(TokenType.INTO): 2999 return None 3000 3001 temp = self._match(TokenType.TEMPORARY) 3002 unlogged = self._match_text_seq("UNLOGGED") 3003 self._match(TokenType.TABLE) 3004 3005 return self.expression( 3006 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3007 ) 3008 3009 def _parse_from( 3010 self, joins: bool = False, skip_from_token: bool = False 3011 ) -> t.Optional[exp.From]: 3012 if not skip_from_token and not self._match(TokenType.FROM): 3013 return None 3014 3015 return self.expression( 3016 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3017 ) 3018 3019 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3020 return self.expression( 3021 exp.MatchRecognizeMeasure, 3022 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3023 this=self._parse_expression(), 3024 ) 3025 3026 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3027 if not self._match(TokenType.MATCH_RECOGNIZE): 3028 return None 3029 3030 self._match_l_paren() 3031 3032 partition = self._parse_partition_by() 3033 order = self._parse_order() 3034 3035 measures = ( 3036 self._parse_csv(self._parse_match_recognize_measure) 3037 if self._match_text_seq("MEASURES") 3038 else None 3039 ) 3040 3041 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3042 rows = exp.var("ONE ROW PER MATCH") 3043 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3044 text = "ALL ROWS PER MATCH" 3045 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3046 text += " SHOW EMPTY MATCHES" 3047 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3048 text += " OMIT EMPTY MATCHES" 3049 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3050 text += " WITH UNMATCHED ROWS" 3051 rows = exp.var(text) 3052 else: 3053 rows = None 3054 3055 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3056 text = "AFTER MATCH SKIP" 3057 if self._match_text_seq("PAST", "LAST", "ROW"): 3058 text += " PAST LAST ROW" 3059 elif self._match_text_seq("TO", "NEXT", "ROW"): 3060 text += " TO NEXT ROW" 3061 elif self._match_text_seq("TO", "FIRST"): 3062 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3063 elif self._match_text_seq("TO", "LAST"): 3064 text += f" TO LAST {self._advance_any().text}" # type: ignore 3065 after = exp.var(text) 3066 else: 3067 after = None 3068 3069 if self._match_text_seq("PATTERN"): 3070 self._match_l_paren() 3071 3072 if not self._curr: 3073 self.raise_error("Expecting )", self._curr) 3074 3075 paren = 1 3076 start = self._curr 3077 3078 while self._curr and paren > 0: 3079 if self._curr.token_type == TokenType.L_PAREN: 3080 paren += 1 3081 if self._curr.token_type == TokenType.R_PAREN: 3082 paren -= 1 3083 3084 end = self._prev 3085 self._advance() 3086 3087 if paren > 0: 3088 self.raise_error("Expecting )", self._curr) 3089 3090 pattern = exp.var(self._find_sql(start, end)) 3091 else: 3092 pattern = None 3093 3094 define = ( 3095 self._parse_csv(self._parse_name_as_expression) 3096 if self._match_text_seq("DEFINE") 3097 else None 3098 ) 3099 3100 self._match_r_paren() 3101 3102 return self.expression( 3103 exp.MatchRecognize, 3104 partition_by=partition, 3105 order=order, 3106 measures=measures, 3107 rows=rows, 3108 after=after, 3109 pattern=pattern, 3110 define=define, 3111 alias=self._parse_table_alias(), 3112 ) 3113 3114 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3115 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3116 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3117 cross_apply = False 3118 3119 if cross_apply is not None: 3120 this = self._parse_select(table=True) 3121 view = None 3122 outer = None 3123 elif self._match(TokenType.LATERAL): 3124 this = self._parse_select(table=True) 3125 view = self._match(TokenType.VIEW) 3126 outer = self._match(TokenType.OUTER) 3127 else: 3128 return None 3129 3130 if not this: 3131 this = ( 3132 self._parse_unnest() 3133 or self._parse_function() 3134 or self._parse_id_var(any_token=False) 3135 ) 3136 3137 while self._match(TokenType.DOT): 3138 this = exp.Dot( 3139 this=this, 3140 expression=self._parse_function() or self._parse_id_var(any_token=False), 3141 ) 3142 3143 if view: 3144 table = self._parse_id_var(any_token=False) 3145 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3146 table_alias: t.Optional[exp.TableAlias] = self.expression( 3147 exp.TableAlias, this=table, columns=columns 3148 ) 3149 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3150 # We move the alias from the lateral's child node to the lateral itself 3151 table_alias = this.args["alias"].pop() 3152 else: 3153 table_alias = self._parse_table_alias() 3154 3155 return self.expression( 3156 exp.Lateral, 3157 this=this, 3158 view=view, 3159 outer=outer, 3160 alias=table_alias, 3161 cross_apply=cross_apply, 3162 ) 3163 3164 def _parse_join_parts( 3165 self, 3166 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3167 return ( 3168 self._match_set(self.JOIN_METHODS) and self._prev, 3169 self._match_set(self.JOIN_SIDES) and self._prev, 3170 self._match_set(self.JOIN_KINDS) and self._prev, 3171 ) 3172 3173 def _parse_join( 3174 self, skip_join_token: bool = False, parse_bracket: bool = False 3175 ) -> t.Optional[exp.Join]: 3176 if self._match(TokenType.COMMA): 3177 return self.expression(exp.Join, this=self._parse_table()) 3178 3179 index = self._index 3180 method, side, kind = self._parse_join_parts() 3181 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3182 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3183 3184 if not skip_join_token and not join: 3185 self._retreat(index) 3186 kind = None 3187 method = None 3188 side = None 3189 3190 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3191 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3192 3193 if not skip_join_token and not join and not outer_apply and not cross_apply: 3194 return None 3195 3196 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3197 3198 if method: 3199 kwargs["method"] = method.text 3200 if side: 3201 kwargs["side"] = side.text 3202 if kind: 3203 kwargs["kind"] = kind.text 3204 if hint: 3205 kwargs["hint"] = hint 3206 3207 if self._match(TokenType.MATCH_CONDITION): 3208 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3209 3210 if self._match(TokenType.ON): 3211 kwargs["on"] = self._parse_assignment() 3212 elif self._match(TokenType.USING): 3213 kwargs["using"] = self._parse_wrapped_id_vars() 3214 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3215 kind and kind.token_type == TokenType.CROSS 3216 ): 3217 index = self._index 3218 joins: t.Optional[list] = list(self._parse_joins()) 3219 3220 if joins and self._match(TokenType.ON): 3221 kwargs["on"] = self._parse_assignment() 3222 elif joins and self._match(TokenType.USING): 3223 kwargs["using"] = self._parse_wrapped_id_vars() 3224 else: 3225 joins = None 3226 self._retreat(index) 3227 3228 kwargs["this"].set("joins", joins if joins else None) 3229 3230 comments = [c for token in (method, side, kind) if token for c in token.comments] 3231 return self.expression(exp.Join, comments=comments, **kwargs) 3232 3233 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3234 this = self._parse_assignment() 3235 3236 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3237 return this 3238 3239 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3240 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3241 3242 return this 3243 3244 def _parse_index_params(self) -> exp.IndexParameters: 3245 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3246 3247 if self._match(TokenType.L_PAREN, advance=False): 3248 columns = self._parse_wrapped_csv(self._parse_with_operator) 3249 else: 3250 columns = None 3251 3252 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3253 partition_by = self._parse_partition_by() 3254 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3255 tablespace = ( 3256 self._parse_var(any_token=True) 3257 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3258 else None 3259 ) 3260 where = self._parse_where() 3261 3262 on = self._parse_field() if self._match(TokenType.ON) else None 3263 3264 return self.expression( 3265 exp.IndexParameters, 3266 using=using, 3267 columns=columns, 3268 include=include, 3269 partition_by=partition_by, 3270 where=where, 3271 with_storage=with_storage, 3272 tablespace=tablespace, 3273 on=on, 3274 ) 3275 3276 def _parse_index( 3277 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3278 ) -> t.Optional[exp.Index]: 3279 if index or anonymous: 3280 unique = None 3281 primary = None 3282 amp = None 3283 3284 self._match(TokenType.ON) 3285 self._match(TokenType.TABLE) # hive 3286 table = self._parse_table_parts(schema=True) 3287 else: 3288 unique = self._match(TokenType.UNIQUE) 3289 primary = self._match_text_seq("PRIMARY") 3290 amp = self._match_text_seq("AMP") 3291 3292 if not self._match(TokenType.INDEX): 3293 return None 3294 3295 index = self._parse_id_var() 3296 table = None 3297 3298 params = self._parse_index_params() 3299 3300 return self.expression( 3301 exp.Index, 3302 this=index, 3303 table=table, 3304 unique=unique, 3305 primary=primary, 3306 amp=amp, 3307 params=params, 3308 ) 3309 3310 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3311 hints: t.List[exp.Expression] = [] 3312 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3313 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3314 hints.append( 3315 self.expression( 3316 exp.WithTableHint, 3317 expressions=self._parse_csv( 3318 lambda: self._parse_function() or self._parse_var(any_token=True) 3319 ), 3320 ) 3321 ) 3322 self._match_r_paren() 3323 else: 3324 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3325 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3326 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3327 3328 self._match_set((TokenType.INDEX, TokenType.KEY)) 3329 if self._match(TokenType.FOR): 3330 hint.set("target", self._advance_any() and self._prev.text.upper()) 3331 3332 hint.set("expressions", self._parse_wrapped_id_vars()) 3333 hints.append(hint) 3334 3335 return hints or None 3336 3337 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3338 return ( 3339 (not schema and self._parse_function(optional_parens=False)) 3340 or self._parse_id_var(any_token=False) 3341 or self._parse_string_as_identifier() 3342 or self._parse_placeholder() 3343 ) 3344 3345 def _parse_table_parts( 3346 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3347 ) -> exp.Table: 3348 catalog = None 3349 db = None 3350 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3351 3352 while self._match(TokenType.DOT): 3353 if catalog: 3354 # This allows nesting the table in arbitrarily many dot expressions if needed 3355 table = self.expression( 3356 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3357 ) 3358 else: 3359 catalog = db 3360 db = table 3361 # "" used for tsql FROM a..b case 3362 table = self._parse_table_part(schema=schema) or "" 3363 3364 if ( 3365 wildcard 3366 and self._is_connected() 3367 and (isinstance(table, exp.Identifier) or not table) 3368 and self._match(TokenType.STAR) 3369 ): 3370 if isinstance(table, exp.Identifier): 3371 table.args["this"] += "*" 3372 else: 3373 table = exp.Identifier(this="*") 3374 3375 # We bubble up comments from the Identifier to the Table 3376 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3377 3378 if is_db_reference: 3379 catalog = db 3380 db = table 3381 table = None 3382 3383 if not table and not is_db_reference: 3384 self.raise_error(f"Expected table name but got {self._curr}") 3385 if not db and is_db_reference: 3386 self.raise_error(f"Expected database name but got {self._curr}") 3387 3388 table = self.expression( 3389 exp.Table, 3390 comments=comments, 3391 this=table, 3392 db=db, 3393 catalog=catalog, 3394 ) 3395 3396 changes = self._parse_changes() 3397 if changes: 3398 table.set("changes", changes) 3399 3400 at_before = self._parse_historical_data() 3401 if at_before: 3402 table.set("when", at_before) 3403 3404 pivots = self._parse_pivots() 3405 if pivots: 3406 table.set("pivots", pivots) 3407 3408 return table 3409 3410 def _parse_table( 3411 self, 3412 schema: bool = False, 3413 joins: bool = False, 3414 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3415 parse_bracket: bool = False, 3416 is_db_reference: bool = False, 3417 parse_partition: bool = False, 3418 ) -> t.Optional[exp.Expression]: 3419 lateral = self._parse_lateral() 3420 if lateral: 3421 return lateral 3422 3423 unnest = self._parse_unnest() 3424 if unnest: 3425 return unnest 3426 3427 values = self._parse_derived_table_values() 3428 if values: 3429 return values 3430 3431 subquery = self._parse_select(table=True) 3432 if subquery: 3433 if not subquery.args.get("pivots"): 3434 subquery.set("pivots", self._parse_pivots()) 3435 return subquery 3436 3437 bracket = parse_bracket and self._parse_bracket(None) 3438 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3439 3440 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3441 self._parse_table 3442 ) 3443 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3444 3445 only = self._match(TokenType.ONLY) 3446 3447 this = t.cast( 3448 exp.Expression, 3449 bracket 3450 or rows_from 3451 or self._parse_bracket( 3452 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3453 ), 3454 ) 3455 3456 if only: 3457 this.set("only", only) 3458 3459 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3460 self._match_text_seq("*") 3461 3462 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3463 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3464 this.set("partition", self._parse_partition()) 3465 3466 if schema: 3467 return self._parse_schema(this=this) 3468 3469 version = self._parse_version() 3470 3471 if version: 3472 this.set("version", version) 3473 3474 if self.dialect.ALIAS_POST_TABLESAMPLE: 3475 table_sample = self._parse_table_sample() 3476 3477 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3478 if alias: 3479 this.set("alias", alias) 3480 3481 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3482 return self.expression( 3483 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3484 ) 3485 3486 this.set("hints", self._parse_table_hints()) 3487 3488 if not this.args.get("pivots"): 3489 this.set("pivots", self._parse_pivots()) 3490 3491 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3492 table_sample = self._parse_table_sample() 3493 3494 if table_sample: 3495 table_sample.set("this", this) 3496 this = table_sample 3497 3498 if joins: 3499 for join in self._parse_joins(): 3500 this.append("joins", join) 3501 3502 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3503 this.set("ordinality", True) 3504 this.set("alias", self._parse_table_alias()) 3505 3506 return this 3507 3508 def _parse_version(self) -> t.Optional[exp.Version]: 3509 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3510 this = "TIMESTAMP" 3511 elif self._match(TokenType.VERSION_SNAPSHOT): 3512 this = "VERSION" 3513 else: 3514 return None 3515 3516 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3517 kind = self._prev.text.upper() 3518 start = self._parse_bitwise() 3519 self._match_texts(("TO", "AND")) 3520 end = self._parse_bitwise() 3521 expression: t.Optional[exp.Expression] = self.expression( 3522 exp.Tuple, expressions=[start, end] 3523 ) 3524 elif self._match_text_seq("CONTAINED", "IN"): 3525 kind = "CONTAINED IN" 3526 expression = self.expression( 3527 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3528 ) 3529 elif self._match(TokenType.ALL): 3530 kind = "ALL" 3531 expression = None 3532 else: 3533 self._match_text_seq("AS", "OF") 3534 kind = "AS OF" 3535 expression = self._parse_type() 3536 3537 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3538 3539 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3540 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3541 index = self._index 3542 historical_data = None 3543 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3544 this = self._prev.text.upper() 3545 kind = ( 3546 self._match(TokenType.L_PAREN) 3547 and self._match_texts(self.HISTORICAL_DATA_KIND) 3548 and self._prev.text.upper() 3549 ) 3550 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3551 3552 if expression: 3553 self._match_r_paren() 3554 historical_data = self.expression( 3555 exp.HistoricalData, this=this, kind=kind, expression=expression 3556 ) 3557 else: 3558 self._retreat(index) 3559 3560 return historical_data 3561 3562 def _parse_changes(self) -> t.Optional[exp.Changes]: 3563 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3564 return None 3565 3566 information = self._parse_var(any_token=True) 3567 self._match_r_paren() 3568 3569 return self.expression( 3570 exp.Changes, 3571 information=information, 3572 at_before=self._parse_historical_data(), 3573 end=self._parse_historical_data(), 3574 ) 3575 3576 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3577 if not self._match(TokenType.UNNEST): 3578 return None 3579 3580 expressions = self._parse_wrapped_csv(self._parse_equality) 3581 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3582 3583 alias = self._parse_table_alias() if with_alias else None 3584 3585 if alias: 3586 if self.dialect.UNNEST_COLUMN_ONLY: 3587 if alias.args.get("columns"): 3588 self.raise_error("Unexpected extra column alias in unnest.") 3589 3590 alias.set("columns", [alias.this]) 3591 alias.set("this", None) 3592 3593 columns = alias.args.get("columns") or [] 3594 if offset and len(expressions) < len(columns): 3595 offset = columns.pop() 3596 3597 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3598 self._match(TokenType.ALIAS) 3599 offset = self._parse_id_var( 3600 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3601 ) or exp.to_identifier("offset") 3602 3603 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3604 3605 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3606 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3607 if not is_derived and not self._match_text_seq("VALUES"): 3608 return None 3609 3610 expressions = self._parse_csv(self._parse_value) 3611 alias = self._parse_table_alias() 3612 3613 if is_derived: 3614 self._match_r_paren() 3615 3616 return self.expression( 3617 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3618 ) 3619 3620 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3621 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3622 as_modifier and self._match_text_seq("USING", "SAMPLE") 3623 ): 3624 return None 3625 3626 bucket_numerator = None 3627 bucket_denominator = None 3628 bucket_field = None 3629 percent = None 3630 size = None 3631 seed = None 3632 3633 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3634 matched_l_paren = self._match(TokenType.L_PAREN) 3635 3636 if self.TABLESAMPLE_CSV: 3637 num = None 3638 expressions = self._parse_csv(self._parse_primary) 3639 else: 3640 expressions = None 3641 num = ( 3642 self._parse_factor() 3643 if self._match(TokenType.NUMBER, advance=False) 3644 else self._parse_primary() or self._parse_placeholder() 3645 ) 3646 3647 if self._match_text_seq("BUCKET"): 3648 bucket_numerator = self._parse_number() 3649 self._match_text_seq("OUT", "OF") 3650 bucket_denominator = bucket_denominator = self._parse_number() 3651 self._match(TokenType.ON) 3652 bucket_field = self._parse_field() 3653 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3654 percent = num 3655 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3656 size = num 3657 else: 3658 percent = num 3659 3660 if matched_l_paren: 3661 self._match_r_paren() 3662 3663 if self._match(TokenType.L_PAREN): 3664 method = self._parse_var(upper=True) 3665 seed = self._match(TokenType.COMMA) and self._parse_number() 3666 self._match_r_paren() 3667 elif self._match_texts(("SEED", "REPEATABLE")): 3668 seed = self._parse_wrapped(self._parse_number) 3669 3670 if not method and self.DEFAULT_SAMPLING_METHOD: 3671 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3672 3673 return self.expression( 3674 exp.TableSample, 3675 expressions=expressions, 3676 method=method, 3677 bucket_numerator=bucket_numerator, 3678 bucket_denominator=bucket_denominator, 3679 bucket_field=bucket_field, 3680 percent=percent, 3681 size=size, 3682 seed=seed, 3683 ) 3684 3685 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3686 return list(iter(self._parse_pivot, None)) or None 3687 3688 def _parse_joins(self) -> t.Iterator[exp.Join]: 3689 return iter(self._parse_join, None) 3690 3691 # https://duckdb.org/docs/sql/statements/pivot 3692 def _parse_simplified_pivot(self) -> exp.Pivot: 3693 def _parse_on() -> t.Optional[exp.Expression]: 3694 this = self._parse_bitwise() 3695 return self._parse_in(this) if self._match(TokenType.IN) else this 3696 3697 this = self._parse_table() 3698 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3699 using = self._match(TokenType.USING) and self._parse_csv( 3700 lambda: self._parse_alias(self._parse_function()) 3701 ) 3702 group = self._parse_group() 3703 return self.expression( 3704 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3705 ) 3706 3707 def _parse_pivot_in(self) -> exp.In: 3708 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3709 this = self._parse_assignment() 3710 3711 self._match(TokenType.ALIAS) 3712 alias = self._parse_field() 3713 if alias: 3714 return self.expression(exp.PivotAlias, this=this, alias=alias) 3715 3716 return this 3717 3718 value = self._parse_column() 3719 3720 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3721 self.raise_error("Expecting IN (") 3722 3723 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3724 3725 self._match_r_paren() 3726 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3727 3728 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3729 index = self._index 3730 include_nulls = None 3731 3732 if self._match(TokenType.PIVOT): 3733 unpivot = False 3734 elif self._match(TokenType.UNPIVOT): 3735 unpivot = True 3736 3737 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3738 if self._match_text_seq("INCLUDE", "NULLS"): 3739 include_nulls = True 3740 elif self._match_text_seq("EXCLUDE", "NULLS"): 3741 include_nulls = False 3742 else: 3743 return None 3744 3745 expressions = [] 3746 3747 if not self._match(TokenType.L_PAREN): 3748 self._retreat(index) 3749 return None 3750 3751 if unpivot: 3752 expressions = self._parse_csv(self._parse_column) 3753 else: 3754 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3755 3756 if not expressions: 3757 self.raise_error("Failed to parse PIVOT's aggregation list") 3758 3759 if not self._match(TokenType.FOR): 3760 self.raise_error("Expecting FOR") 3761 3762 field = self._parse_pivot_in() 3763 3764 self._match_r_paren() 3765 3766 pivot = self.expression( 3767 exp.Pivot, 3768 expressions=expressions, 3769 field=field, 3770 unpivot=unpivot, 3771 include_nulls=include_nulls, 3772 ) 3773 3774 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3775 pivot.set("alias", self._parse_table_alias()) 3776 3777 if not unpivot: 3778 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3779 3780 columns: t.List[exp.Expression] = [] 3781 for fld in pivot.args["field"].expressions: 3782 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3783 for name in names: 3784 if self.PREFIXED_PIVOT_COLUMNS: 3785 name = f"{name}_{field_name}" if name else field_name 3786 else: 3787 name = f"{field_name}_{name}" if name else field_name 3788 3789 columns.append(exp.to_identifier(name)) 3790 3791 pivot.set("columns", columns) 3792 3793 return pivot 3794 3795 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3796 return [agg.alias for agg in aggregations] 3797 3798 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3799 if not skip_where_token and not self._match(TokenType.PREWHERE): 3800 return None 3801 3802 return self.expression( 3803 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3804 ) 3805 3806 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3807 if not skip_where_token and not self._match(TokenType.WHERE): 3808 return None 3809 3810 return self.expression( 3811 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3812 ) 3813 3814 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3815 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3816 return None 3817 3818 elements: t.Dict[str, t.Any] = defaultdict(list) 3819 3820 if self._match(TokenType.ALL): 3821 elements["all"] = True 3822 elif self._match(TokenType.DISTINCT): 3823 elements["all"] = False 3824 3825 while True: 3826 expressions = self._parse_csv( 3827 lambda: None 3828 if self._match(TokenType.ROLLUP, advance=False) 3829 else self._parse_assignment() 3830 ) 3831 if expressions: 3832 elements["expressions"].extend(expressions) 3833 3834 grouping_sets = self._parse_grouping_sets() 3835 if grouping_sets: 3836 elements["grouping_sets"].extend(grouping_sets) 3837 3838 rollup = None 3839 cube = None 3840 totals = None 3841 3842 index = self._index 3843 with_ = self._match(TokenType.WITH) 3844 if self._match(TokenType.ROLLUP): 3845 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3846 elements["rollup"].extend(ensure_list(rollup)) 3847 3848 if self._match(TokenType.CUBE): 3849 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3850 elements["cube"].extend(ensure_list(cube)) 3851 3852 if self._match_text_seq("TOTALS"): 3853 totals = True 3854 elements["totals"] = True # type: ignore 3855 3856 if not (grouping_sets or rollup or cube or totals): 3857 if with_: 3858 self._retreat(index) 3859 break 3860 3861 return self.expression(exp.Group, **elements) # type: ignore 3862 3863 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3864 if not self._match(TokenType.GROUPING_SETS): 3865 return None 3866 3867 return self._parse_wrapped_csv(self._parse_grouping_set) 3868 3869 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3870 if self._match(TokenType.L_PAREN): 3871 grouping_set = self._parse_csv(self._parse_column) 3872 self._match_r_paren() 3873 return self.expression(exp.Tuple, expressions=grouping_set) 3874 3875 return self._parse_column() 3876 3877 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3878 if not skip_having_token and not self._match(TokenType.HAVING): 3879 return None 3880 return self.expression(exp.Having, this=self._parse_assignment()) 3881 3882 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3883 if not self._match(TokenType.QUALIFY): 3884 return None 3885 return self.expression(exp.Qualify, this=self._parse_assignment()) 3886 3887 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3888 if skip_start_token: 3889 start = None 3890 elif self._match(TokenType.START_WITH): 3891 start = self._parse_assignment() 3892 else: 3893 return None 3894 3895 self._match(TokenType.CONNECT_BY) 3896 nocycle = self._match_text_seq("NOCYCLE") 3897 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3898 exp.Prior, this=self._parse_bitwise() 3899 ) 3900 connect = self._parse_assignment() 3901 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3902 3903 if not start and self._match(TokenType.START_WITH): 3904 start = self._parse_assignment() 3905 3906 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3907 3908 def _parse_name_as_expression(self) -> exp.Alias: 3909 return self.expression( 3910 exp.Alias, 3911 alias=self._parse_id_var(any_token=True), 3912 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3913 ) 3914 3915 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3916 if self._match_text_seq("INTERPOLATE"): 3917 return self._parse_wrapped_csv(self._parse_name_as_expression) 3918 return None 3919 3920 def _parse_order( 3921 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3922 ) -> t.Optional[exp.Expression]: 3923 siblings = None 3924 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3925 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3926 return this 3927 3928 siblings = True 3929 3930 return self.expression( 3931 exp.Order, 3932 this=this, 3933 expressions=self._parse_csv(self._parse_ordered), 3934 interpolate=self._parse_interpolate(), 3935 siblings=siblings, 3936 ) 3937 3938 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3939 if not self._match(token): 3940 return None 3941 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3942 3943 def _parse_ordered( 3944 self, parse_method: t.Optional[t.Callable] = None 3945 ) -> t.Optional[exp.Ordered]: 3946 this = parse_method() if parse_method else self._parse_assignment() 3947 if not this: 3948 return None 3949 3950 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3951 this = exp.var("ALL") 3952 3953 asc = self._match(TokenType.ASC) 3954 desc = self._match(TokenType.DESC) or (asc and False) 3955 3956 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3957 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3958 3959 nulls_first = is_nulls_first or False 3960 explicitly_null_ordered = is_nulls_first or is_nulls_last 3961 3962 if ( 3963 not explicitly_null_ordered 3964 and ( 3965 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3966 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3967 ) 3968 and self.dialect.NULL_ORDERING != "nulls_are_last" 3969 ): 3970 nulls_first = True 3971 3972 if self._match_text_seq("WITH", "FILL"): 3973 with_fill = self.expression( 3974 exp.WithFill, 3975 **{ # type: ignore 3976 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3977 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3978 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3979 }, 3980 ) 3981 else: 3982 with_fill = None 3983 3984 return self.expression( 3985 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3986 ) 3987 3988 def _parse_limit( 3989 self, 3990 this: t.Optional[exp.Expression] = None, 3991 top: bool = False, 3992 skip_limit_token: bool = False, 3993 ) -> t.Optional[exp.Expression]: 3994 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3995 comments = self._prev_comments 3996 if top: 3997 limit_paren = self._match(TokenType.L_PAREN) 3998 expression = self._parse_term() if limit_paren else self._parse_number() 3999 4000 if limit_paren: 4001 self._match_r_paren() 4002 else: 4003 expression = self._parse_term() 4004 4005 if self._match(TokenType.COMMA): 4006 offset = expression 4007 expression = self._parse_term() 4008 else: 4009 offset = None 4010 4011 limit_exp = self.expression( 4012 exp.Limit, 4013 this=this, 4014 expression=expression, 4015 offset=offset, 4016 comments=comments, 4017 expressions=self._parse_limit_by(), 4018 ) 4019 4020 return limit_exp 4021 4022 if self._match(TokenType.FETCH): 4023 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4024 direction = self._prev.text.upper() if direction else "FIRST" 4025 4026 count = self._parse_field(tokens=self.FETCH_TOKENS) 4027 percent = self._match(TokenType.PERCENT) 4028 4029 self._match_set((TokenType.ROW, TokenType.ROWS)) 4030 4031 only = self._match_text_seq("ONLY") 4032 with_ties = self._match_text_seq("WITH", "TIES") 4033 4034 if only and with_ties: 4035 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4036 4037 return self.expression( 4038 exp.Fetch, 4039 direction=direction, 4040 count=count, 4041 percent=percent, 4042 with_ties=with_ties, 4043 ) 4044 4045 return this 4046 4047 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4048 if not self._match(TokenType.OFFSET): 4049 return this 4050 4051 count = self._parse_term() 4052 self._match_set((TokenType.ROW, TokenType.ROWS)) 4053 4054 return self.expression( 4055 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4056 ) 4057 4058 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4059 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4060 4061 def _parse_locks(self) -> t.List[exp.Lock]: 4062 locks = [] 4063 while True: 4064 if self._match_text_seq("FOR", "UPDATE"): 4065 update = True 4066 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4067 "LOCK", "IN", "SHARE", "MODE" 4068 ): 4069 update = False 4070 else: 4071 break 4072 4073 expressions = None 4074 if self._match_text_seq("OF"): 4075 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4076 4077 wait: t.Optional[bool | exp.Expression] = None 4078 if self._match_text_seq("NOWAIT"): 4079 wait = True 4080 elif self._match_text_seq("WAIT"): 4081 wait = self._parse_primary() 4082 elif self._match_text_seq("SKIP", "LOCKED"): 4083 wait = False 4084 4085 locks.append( 4086 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4087 ) 4088 4089 return locks 4090 4091 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4092 while this and self._match_set(self.SET_OPERATIONS): 4093 token_type = self._prev.token_type 4094 4095 if token_type == TokenType.UNION: 4096 operation: t.Type[exp.SetOperation] = exp.Union 4097 elif token_type == TokenType.EXCEPT: 4098 operation = exp.Except 4099 else: 4100 operation = exp.Intersect 4101 4102 comments = self._prev.comments 4103 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4104 by_name = self._match_text_seq("BY", "NAME") 4105 expression = self._parse_select(nested=True, parse_set_operation=False) 4106 4107 this = self.expression( 4108 operation, 4109 comments=comments, 4110 this=this, 4111 distinct=distinct, 4112 by_name=by_name, 4113 expression=expression, 4114 ) 4115 4116 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4117 expression = this.expression 4118 4119 if expression: 4120 for arg in self.SET_OP_MODIFIERS: 4121 expr = expression.args.get(arg) 4122 if expr: 4123 this.set(arg, expr.pop()) 4124 4125 return this 4126 4127 def _parse_expression(self) -> t.Optional[exp.Expression]: 4128 return self._parse_alias(self._parse_assignment()) 4129 4130 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4131 this = self._parse_disjunction() 4132 4133 while self._match_set(self.ASSIGNMENT): 4134 this = self.expression( 4135 self.ASSIGNMENT[self._prev.token_type], 4136 this=this, 4137 comments=self._prev_comments, 4138 expression=self._parse_assignment(), 4139 ) 4140 4141 return this 4142 4143 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4144 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4145 4146 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4147 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4148 4149 def _parse_equality(self) -> t.Optional[exp.Expression]: 4150 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4151 4152 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4153 return self._parse_tokens(self._parse_range, self.COMPARISON) 4154 4155 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4156 this = this or self._parse_bitwise() 4157 negate = self._match(TokenType.NOT) 4158 4159 if self._match_set(self.RANGE_PARSERS): 4160 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4161 if not expression: 4162 return this 4163 4164 this = expression 4165 elif self._match(TokenType.ISNULL): 4166 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4167 4168 # Postgres supports ISNULL and NOTNULL for conditions. 4169 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4170 if self._match(TokenType.NOTNULL): 4171 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4172 this = self.expression(exp.Not, this=this) 4173 4174 if negate: 4175 this = self.expression(exp.Not, this=this) 4176 4177 if self._match(TokenType.IS): 4178 this = self._parse_is(this) 4179 4180 return this 4181 4182 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4183 index = self._index - 1 4184 negate = self._match(TokenType.NOT) 4185 4186 if self._match_text_seq("DISTINCT", "FROM"): 4187 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4188 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4189 4190 expression = self._parse_null() or self._parse_boolean() 4191 if not expression: 4192 self._retreat(index) 4193 return None 4194 4195 this = self.expression(exp.Is, this=this, expression=expression) 4196 return self.expression(exp.Not, this=this) if negate else this 4197 4198 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4199 unnest = self._parse_unnest(with_alias=False) 4200 if unnest: 4201 this = self.expression(exp.In, this=this, unnest=unnest) 4202 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4203 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4204 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4205 4206 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4207 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4208 else: 4209 this = self.expression(exp.In, this=this, expressions=expressions) 4210 4211 if matched_l_paren: 4212 self._match_r_paren(this) 4213 elif not self._match(TokenType.R_BRACKET, expression=this): 4214 self.raise_error("Expecting ]") 4215 else: 4216 this = self.expression(exp.In, this=this, field=self._parse_field()) 4217 4218 return this 4219 4220 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4221 low = self._parse_bitwise() 4222 self._match(TokenType.AND) 4223 high = self._parse_bitwise() 4224 return self.expression(exp.Between, this=this, low=low, high=high) 4225 4226 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4227 if not self._match(TokenType.ESCAPE): 4228 return this 4229 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4230 4231 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4232 index = self._index 4233 4234 if not self._match(TokenType.INTERVAL) and match_interval: 4235 return None 4236 4237 if self._match(TokenType.STRING, advance=False): 4238 this = self._parse_primary() 4239 else: 4240 this = self._parse_term() 4241 4242 if not this or ( 4243 isinstance(this, exp.Column) 4244 and not this.table 4245 and not this.this.quoted 4246 and this.name.upper() == "IS" 4247 ): 4248 self._retreat(index) 4249 return None 4250 4251 unit = self._parse_function() or ( 4252 not self._match(TokenType.ALIAS, advance=False) 4253 and self._parse_var(any_token=True, upper=True) 4254 ) 4255 4256 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4257 # each INTERVAL expression into this canonical form so it's easy to transpile 4258 if this and this.is_number: 4259 this = exp.Literal.string(this.to_py()) 4260 elif this and this.is_string: 4261 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4262 if len(parts) == 1: 4263 if unit: 4264 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4265 self._retreat(self._index - 1) 4266 4267 this = exp.Literal.string(parts[0][0]) 4268 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4269 4270 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4271 unit = self.expression( 4272 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4273 ) 4274 4275 interval = self.expression(exp.Interval, this=this, unit=unit) 4276 4277 index = self._index 4278 self._match(TokenType.PLUS) 4279 4280 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4281 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4282 return self.expression( 4283 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4284 ) 4285 4286 self._retreat(index) 4287 return interval 4288 4289 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4290 this = self._parse_term() 4291 4292 while True: 4293 if self._match_set(self.BITWISE): 4294 this = self.expression( 4295 self.BITWISE[self._prev.token_type], 4296 this=this, 4297 expression=self._parse_term(), 4298 ) 4299 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4300 this = self.expression( 4301 exp.DPipe, 4302 this=this, 4303 expression=self._parse_term(), 4304 safe=not self.dialect.STRICT_STRING_CONCAT, 4305 ) 4306 elif self._match(TokenType.DQMARK): 4307 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4308 elif self._match_pair(TokenType.LT, TokenType.LT): 4309 this = self.expression( 4310 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4311 ) 4312 elif self._match_pair(TokenType.GT, TokenType.GT): 4313 this = self.expression( 4314 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4315 ) 4316 else: 4317 break 4318 4319 return this 4320 4321 def _parse_term(self) -> t.Optional[exp.Expression]: 4322 return self._parse_tokens(self._parse_factor, self.TERM) 4323 4324 def _parse_factor(self) -> t.Optional[exp.Expression]: 4325 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4326 this = parse_method() 4327 4328 while self._match_set(self.FACTOR): 4329 klass = self.FACTOR[self._prev.token_type] 4330 comments = self._prev_comments 4331 expression = parse_method() 4332 4333 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4334 self._retreat(self._index - 1) 4335 return this 4336 4337 this = self.expression(klass, this=this, comments=comments, expression=expression) 4338 4339 if isinstance(this, exp.Div): 4340 this.args["typed"] = self.dialect.TYPED_DIVISION 4341 this.args["safe"] = self.dialect.SAFE_DIVISION 4342 4343 return this 4344 4345 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4346 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4347 4348 def _parse_unary(self) -> t.Optional[exp.Expression]: 4349 if self._match_set(self.UNARY_PARSERS): 4350 return self.UNARY_PARSERS[self._prev.token_type](self) 4351 return self._parse_at_time_zone(self._parse_type()) 4352 4353 def _parse_type( 4354 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4355 ) -> t.Optional[exp.Expression]: 4356 interval = parse_interval and self._parse_interval() 4357 if interval: 4358 return interval 4359 4360 index = self._index 4361 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4362 4363 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4364 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4365 if isinstance(data_type, exp.Cast): 4366 # This constructor can contain ops directly after it, for instance struct unnesting: 4367 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4368 return self._parse_column_ops(data_type) 4369 4370 if data_type: 4371 index2 = self._index 4372 this = self._parse_primary() 4373 4374 if isinstance(this, exp.Literal): 4375 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4376 if parser: 4377 return parser(self, this, data_type) 4378 4379 return self.expression(exp.Cast, this=this, to=data_type) 4380 4381 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4382 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4383 # 4384 # If the index difference here is greater than 1, that means the parser itself must have 4385 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4386 # 4387 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4388 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4389 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4390 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4391 # 4392 # In these cases, we don't really want to return the converted type, but instead retreat 4393 # and try to parse a Column or Identifier in the section below. 4394 if data_type.expressions and index2 - index > 1: 4395 self._retreat(index2) 4396 return self._parse_column_ops(data_type) 4397 4398 self._retreat(index) 4399 4400 if fallback_to_identifier: 4401 return self._parse_id_var() 4402 4403 this = self._parse_column() 4404 return this and self._parse_column_ops(this) 4405 4406 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4407 this = self._parse_type() 4408 if not this: 4409 return None 4410 4411 if isinstance(this, exp.Column) and not this.table: 4412 this = exp.var(this.name.upper()) 4413 4414 return self.expression( 4415 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4416 ) 4417 4418 def _parse_types( 4419 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4420 ) -> t.Optional[exp.Expression]: 4421 index = self._index 4422 4423 this: t.Optional[exp.Expression] = None 4424 prefix = self._match_text_seq("SYSUDTLIB", ".") 4425 4426 if not self._match_set(self.TYPE_TOKENS): 4427 identifier = allow_identifiers and self._parse_id_var( 4428 any_token=False, tokens=(TokenType.VAR,) 4429 ) 4430 if isinstance(identifier, exp.Identifier): 4431 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4432 4433 if len(tokens) != 1: 4434 self.raise_error("Unexpected identifier", self._prev) 4435 4436 if tokens[0].token_type in self.TYPE_TOKENS: 4437 self._prev = tokens[0] 4438 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4439 type_name = identifier.name 4440 4441 while self._match(TokenType.DOT): 4442 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4443 4444 this = exp.DataType.build(type_name, udt=True) 4445 else: 4446 self._retreat(self._index - 1) 4447 return None 4448 else: 4449 return None 4450 4451 type_token = self._prev.token_type 4452 4453 if type_token == TokenType.PSEUDO_TYPE: 4454 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4455 4456 if type_token == TokenType.OBJECT_IDENTIFIER: 4457 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4458 4459 # https://materialize.com/docs/sql/types/map/ 4460 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4461 key_type = self._parse_types( 4462 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4463 ) 4464 if not self._match(TokenType.FARROW): 4465 self._retreat(index) 4466 return None 4467 4468 value_type = self._parse_types( 4469 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4470 ) 4471 if not self._match(TokenType.R_BRACKET): 4472 self._retreat(index) 4473 return None 4474 4475 return exp.DataType( 4476 this=exp.DataType.Type.MAP, 4477 expressions=[key_type, value_type], 4478 nested=True, 4479 prefix=prefix, 4480 ) 4481 4482 nested = type_token in self.NESTED_TYPE_TOKENS 4483 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4484 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4485 expressions = None 4486 maybe_func = False 4487 4488 if self._match(TokenType.L_PAREN): 4489 if is_struct: 4490 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4491 elif nested: 4492 expressions = self._parse_csv( 4493 lambda: self._parse_types( 4494 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4495 ) 4496 ) 4497 elif type_token in self.ENUM_TYPE_TOKENS: 4498 expressions = self._parse_csv(self._parse_equality) 4499 elif is_aggregate: 4500 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4501 any_token=False, tokens=(TokenType.VAR,) 4502 ) 4503 if not func_or_ident or not self._match(TokenType.COMMA): 4504 return None 4505 expressions = self._parse_csv( 4506 lambda: self._parse_types( 4507 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4508 ) 4509 ) 4510 expressions.insert(0, func_or_ident) 4511 else: 4512 expressions = self._parse_csv(self._parse_type_size) 4513 4514 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4515 if type_token == TokenType.VECTOR and len(expressions) == 2: 4516 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4517 4518 if not expressions or not self._match(TokenType.R_PAREN): 4519 self._retreat(index) 4520 return None 4521 4522 maybe_func = True 4523 4524 values: t.Optional[t.List[exp.Expression]] = None 4525 4526 if nested and self._match(TokenType.LT): 4527 if is_struct: 4528 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4529 else: 4530 expressions = self._parse_csv( 4531 lambda: self._parse_types( 4532 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4533 ) 4534 ) 4535 4536 if not self._match(TokenType.GT): 4537 self.raise_error("Expecting >") 4538 4539 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4540 values = self._parse_csv(self._parse_assignment) 4541 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4542 4543 if type_token in self.TIMESTAMPS: 4544 if self._match_text_seq("WITH", "TIME", "ZONE"): 4545 maybe_func = False 4546 tz_type = ( 4547 exp.DataType.Type.TIMETZ 4548 if type_token in self.TIMES 4549 else exp.DataType.Type.TIMESTAMPTZ 4550 ) 4551 this = exp.DataType(this=tz_type, expressions=expressions) 4552 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4553 maybe_func = False 4554 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4555 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4556 maybe_func = False 4557 elif type_token == TokenType.INTERVAL: 4558 unit = self._parse_var(upper=True) 4559 if unit: 4560 if self._match_text_seq("TO"): 4561 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4562 4563 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4564 else: 4565 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4566 4567 if maybe_func and check_func: 4568 index2 = self._index 4569 peek = self._parse_string() 4570 4571 if not peek: 4572 self._retreat(index) 4573 return None 4574 4575 self._retreat(index2) 4576 4577 if not this: 4578 if self._match_text_seq("UNSIGNED"): 4579 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4580 if not unsigned_type_token: 4581 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4582 4583 type_token = unsigned_type_token or type_token 4584 4585 this = exp.DataType( 4586 this=exp.DataType.Type[type_token.value], 4587 expressions=expressions, 4588 nested=nested, 4589 prefix=prefix, 4590 ) 4591 4592 # Empty arrays/structs are allowed 4593 if values is not None: 4594 cls = exp.Struct if is_struct else exp.Array 4595 this = exp.cast(cls(expressions=values), this, copy=False) 4596 4597 elif expressions: 4598 this.set("expressions", expressions) 4599 4600 # https://materialize.com/docs/sql/types/list/#type-name 4601 while self._match(TokenType.LIST): 4602 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4603 4604 index = self._index 4605 4606 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4607 matched_array = self._match(TokenType.ARRAY) 4608 4609 while self._curr: 4610 matched_l_bracket = self._match(TokenType.L_BRACKET) 4611 if not matched_l_bracket and not matched_array: 4612 break 4613 4614 matched_array = False 4615 values = self._parse_csv(self._parse_assignment) or None 4616 if values and not schema: 4617 self._retreat(index) 4618 break 4619 4620 this = exp.DataType( 4621 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4622 ) 4623 self._match(TokenType.R_BRACKET) 4624 4625 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4626 converter = self.TYPE_CONVERTERS.get(this.this) 4627 if converter: 4628 this = converter(t.cast(exp.DataType, this)) 4629 4630 return this 4631 4632 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4633 index = self._index 4634 4635 if ( 4636 self._curr 4637 and self._next 4638 and self._curr.token_type in self.TYPE_TOKENS 4639 and self._next.token_type in self.TYPE_TOKENS 4640 ): 4641 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4642 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4643 this = self._parse_id_var() 4644 else: 4645 this = ( 4646 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4647 or self._parse_id_var() 4648 ) 4649 4650 self._match(TokenType.COLON) 4651 4652 if ( 4653 type_required 4654 and not isinstance(this, exp.DataType) 4655 and not self._match_set(self.TYPE_TOKENS, advance=False) 4656 ): 4657 self._retreat(index) 4658 return self._parse_types() 4659 4660 return self._parse_column_def(this) 4661 4662 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4663 if not self._match_text_seq("AT", "TIME", "ZONE"): 4664 return this 4665 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4666 4667 def _parse_column(self) -> t.Optional[exp.Expression]: 4668 this = self._parse_column_reference() 4669 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4670 4671 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4672 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4673 4674 return column 4675 4676 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4677 this = self._parse_field() 4678 if ( 4679 not this 4680 and self._match(TokenType.VALUES, advance=False) 4681 and self.VALUES_FOLLOWED_BY_PAREN 4682 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4683 ): 4684 this = self._parse_id_var() 4685 4686 if isinstance(this, exp.Identifier): 4687 # We bubble up comments from the Identifier to the Column 4688 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4689 4690 return this 4691 4692 def _parse_colon_as_variant_extract( 4693 self, this: t.Optional[exp.Expression] 4694 ) -> t.Optional[exp.Expression]: 4695 casts = [] 4696 json_path = [] 4697 4698 while self._match(TokenType.COLON): 4699 start_index = self._index 4700 4701 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4702 path = self._parse_column_ops( 4703 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4704 ) 4705 4706 # The cast :: operator has a lower precedence than the extraction operator :, so 4707 # we rearrange the AST appropriately to avoid casting the JSON path 4708 while isinstance(path, exp.Cast): 4709 casts.append(path.to) 4710 path = path.this 4711 4712 if casts: 4713 dcolon_offset = next( 4714 i 4715 for i, t in enumerate(self._tokens[start_index:]) 4716 if t.token_type == TokenType.DCOLON 4717 ) 4718 end_token = self._tokens[start_index + dcolon_offset - 1] 4719 else: 4720 end_token = self._prev 4721 4722 if path: 4723 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4724 4725 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4726 # Databricks transforms it back to the colon/dot notation 4727 if json_path: 4728 this = self.expression( 4729 exp.JSONExtract, 4730 this=this, 4731 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4732 variant_extract=True, 4733 ) 4734 4735 while casts: 4736 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4737 4738 return this 4739 4740 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4741 return self._parse_types() 4742 4743 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4744 this = self._parse_bracket(this) 4745 4746 while self._match_set(self.COLUMN_OPERATORS): 4747 op_token = self._prev.token_type 4748 op = self.COLUMN_OPERATORS.get(op_token) 4749 4750 if op_token == TokenType.DCOLON: 4751 field = self._parse_dcolon() 4752 if not field: 4753 self.raise_error("Expected type") 4754 elif op and self._curr: 4755 field = self._parse_column_reference() 4756 else: 4757 field = self._parse_field(any_token=True, anonymous_func=True) 4758 4759 if isinstance(field, exp.Func) and this: 4760 # bigquery allows function calls like x.y.count(...) 4761 # SAFE.SUBSTR(...) 4762 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4763 this = exp.replace_tree( 4764 this, 4765 lambda n: ( 4766 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4767 if n.table 4768 else n.this 4769 ) 4770 if isinstance(n, exp.Column) 4771 else n, 4772 ) 4773 4774 if op: 4775 this = op(self, this, field) 4776 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4777 this = self.expression( 4778 exp.Column, 4779 this=field, 4780 table=this.this, 4781 db=this.args.get("table"), 4782 catalog=this.args.get("db"), 4783 ) 4784 else: 4785 this = self.expression(exp.Dot, this=this, expression=field) 4786 4787 this = self._parse_bracket(this) 4788 4789 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4790 4791 def _parse_primary(self) -> t.Optional[exp.Expression]: 4792 if self._match_set(self.PRIMARY_PARSERS): 4793 token_type = self._prev.token_type 4794 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4795 4796 if token_type == TokenType.STRING: 4797 expressions = [primary] 4798 while self._match(TokenType.STRING): 4799 expressions.append(exp.Literal.string(self._prev.text)) 4800 4801 if len(expressions) > 1: 4802 return self.expression(exp.Concat, expressions=expressions) 4803 4804 return primary 4805 4806 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4807 return exp.Literal.number(f"0.{self._prev.text}") 4808 4809 if self._match(TokenType.L_PAREN): 4810 comments = self._prev_comments 4811 query = self._parse_select() 4812 4813 if query: 4814 expressions = [query] 4815 else: 4816 expressions = self._parse_expressions() 4817 4818 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4819 4820 if not this and self._match(TokenType.R_PAREN, advance=False): 4821 this = self.expression(exp.Tuple) 4822 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4823 this = self._parse_subquery(this=this, parse_alias=False) 4824 elif isinstance(this, exp.Subquery): 4825 this = self._parse_subquery( 4826 this=self._parse_set_operations(this), parse_alias=False 4827 ) 4828 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4829 this = self.expression(exp.Tuple, expressions=expressions) 4830 else: 4831 this = self.expression(exp.Paren, this=this) 4832 4833 if this: 4834 this.add_comments(comments) 4835 4836 self._match_r_paren(expression=this) 4837 return this 4838 4839 return None 4840 4841 def _parse_field( 4842 self, 4843 any_token: bool = False, 4844 tokens: t.Optional[t.Collection[TokenType]] = None, 4845 anonymous_func: bool = False, 4846 ) -> t.Optional[exp.Expression]: 4847 if anonymous_func: 4848 field = ( 4849 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4850 or self._parse_primary() 4851 ) 4852 else: 4853 field = self._parse_primary() or self._parse_function( 4854 anonymous=anonymous_func, any_token=any_token 4855 ) 4856 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4857 4858 def _parse_function( 4859 self, 4860 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4861 anonymous: bool = False, 4862 optional_parens: bool = True, 4863 any_token: bool = False, 4864 ) -> t.Optional[exp.Expression]: 4865 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4866 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4867 fn_syntax = False 4868 if ( 4869 self._match(TokenType.L_BRACE, advance=False) 4870 and self._next 4871 and self._next.text.upper() == "FN" 4872 ): 4873 self._advance(2) 4874 fn_syntax = True 4875 4876 func = self._parse_function_call( 4877 functions=functions, 4878 anonymous=anonymous, 4879 optional_parens=optional_parens, 4880 any_token=any_token, 4881 ) 4882 4883 if fn_syntax: 4884 self._match(TokenType.R_BRACE) 4885 4886 return func 4887 4888 def _parse_function_call( 4889 self, 4890 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4891 anonymous: bool = False, 4892 optional_parens: bool = True, 4893 any_token: bool = False, 4894 ) -> t.Optional[exp.Expression]: 4895 if not self._curr: 4896 return None 4897 4898 comments = self._curr.comments 4899 token_type = self._curr.token_type 4900 this = self._curr.text 4901 upper = this.upper() 4902 4903 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4904 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4905 self._advance() 4906 return self._parse_window(parser(self)) 4907 4908 if not self._next or self._next.token_type != TokenType.L_PAREN: 4909 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4910 self._advance() 4911 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4912 4913 return None 4914 4915 if any_token: 4916 if token_type in self.RESERVED_TOKENS: 4917 return None 4918 elif token_type not in self.FUNC_TOKENS: 4919 return None 4920 4921 self._advance(2) 4922 4923 parser = self.FUNCTION_PARSERS.get(upper) 4924 if parser and not anonymous: 4925 this = parser(self) 4926 else: 4927 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4928 4929 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4930 this = self.expression(subquery_predicate, this=self._parse_select()) 4931 self._match_r_paren() 4932 return this 4933 4934 if functions is None: 4935 functions = self.FUNCTIONS 4936 4937 function = functions.get(upper) 4938 4939 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4940 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4941 4942 if alias: 4943 args = self._kv_to_prop_eq(args) 4944 4945 if function and not anonymous: 4946 if "dialect" in function.__code__.co_varnames: 4947 func = function(args, dialect=self.dialect) 4948 else: 4949 func = function(args) 4950 4951 func = self.validate_expression(func, args) 4952 if not self.dialect.NORMALIZE_FUNCTIONS: 4953 func.meta["name"] = this 4954 4955 this = func 4956 else: 4957 if token_type == TokenType.IDENTIFIER: 4958 this = exp.Identifier(this=this, quoted=True) 4959 this = self.expression(exp.Anonymous, this=this, expressions=args) 4960 4961 if isinstance(this, exp.Expression): 4962 this.add_comments(comments) 4963 4964 self._match_r_paren(this) 4965 return self._parse_window(this) 4966 4967 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4968 transformed = [] 4969 4970 for e in expressions: 4971 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4972 if isinstance(e, exp.Alias): 4973 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4974 4975 if not isinstance(e, exp.PropertyEQ): 4976 e = self.expression( 4977 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4978 ) 4979 4980 if isinstance(e.this, exp.Column): 4981 e.this.replace(e.this.this) 4982 4983 transformed.append(e) 4984 4985 return transformed 4986 4987 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4988 return self._parse_column_def(self._parse_id_var()) 4989 4990 def _parse_user_defined_function( 4991 self, kind: t.Optional[TokenType] = None 4992 ) -> t.Optional[exp.Expression]: 4993 this = self._parse_id_var() 4994 4995 while self._match(TokenType.DOT): 4996 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4997 4998 if not self._match(TokenType.L_PAREN): 4999 return this 5000 5001 expressions = self._parse_csv(self._parse_function_parameter) 5002 self._match_r_paren() 5003 return self.expression( 5004 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5005 ) 5006 5007 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5008 literal = self._parse_primary() 5009 if literal: 5010 return self.expression(exp.Introducer, this=token.text, expression=literal) 5011 5012 return self.expression(exp.Identifier, this=token.text) 5013 5014 def _parse_session_parameter(self) -> exp.SessionParameter: 5015 kind = None 5016 this = self._parse_id_var() or self._parse_primary() 5017 5018 if this and self._match(TokenType.DOT): 5019 kind = this.name 5020 this = self._parse_var() or self._parse_primary() 5021 5022 return self.expression(exp.SessionParameter, this=this, kind=kind) 5023 5024 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5025 return self._parse_id_var() 5026 5027 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5028 index = self._index 5029 5030 if self._match(TokenType.L_PAREN): 5031 expressions = t.cast( 5032 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5033 ) 5034 5035 if not self._match(TokenType.R_PAREN): 5036 self._retreat(index) 5037 else: 5038 expressions = [self._parse_lambda_arg()] 5039 5040 if self._match_set(self.LAMBDAS): 5041 return self.LAMBDAS[self._prev.token_type](self, expressions) 5042 5043 self._retreat(index) 5044 5045 this: t.Optional[exp.Expression] 5046 5047 if self._match(TokenType.DISTINCT): 5048 this = self.expression( 5049 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5050 ) 5051 else: 5052 this = self._parse_select_or_expression(alias=alias) 5053 5054 return self._parse_limit( 5055 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5056 ) 5057 5058 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5059 index = self._index 5060 if not self._match(TokenType.L_PAREN): 5061 return this 5062 5063 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5064 # expr can be of both types 5065 if self._match_set(self.SELECT_START_TOKENS): 5066 self._retreat(index) 5067 return this 5068 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5069 self._match_r_paren() 5070 return self.expression(exp.Schema, this=this, expressions=args) 5071 5072 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5073 return self._parse_column_def(self._parse_field(any_token=True)) 5074 5075 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5076 # column defs are not really columns, they're identifiers 5077 if isinstance(this, exp.Column): 5078 this = this.this 5079 5080 kind = self._parse_types(schema=True) 5081 5082 if self._match_text_seq("FOR", "ORDINALITY"): 5083 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5084 5085 constraints: t.List[exp.Expression] = [] 5086 5087 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5088 ("ALIAS", "MATERIALIZED") 5089 ): 5090 persisted = self._prev.text.upper() == "MATERIALIZED" 5091 constraints.append( 5092 self.expression( 5093 exp.ComputedColumnConstraint, 5094 this=self._parse_assignment(), 5095 persisted=persisted or self._match_text_seq("PERSISTED"), 5096 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5097 ) 5098 ) 5099 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5100 self._match(TokenType.ALIAS) 5101 constraints.append( 5102 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5103 ) 5104 5105 while True: 5106 constraint = self._parse_column_constraint() 5107 if not constraint: 5108 break 5109 constraints.append(constraint) 5110 5111 if not kind and not constraints: 5112 return this 5113 5114 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5115 5116 def _parse_auto_increment( 5117 self, 5118 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5119 start = None 5120 increment = None 5121 5122 if self._match(TokenType.L_PAREN, advance=False): 5123 args = self._parse_wrapped_csv(self._parse_bitwise) 5124 start = seq_get(args, 0) 5125 increment = seq_get(args, 1) 5126 elif self._match_text_seq("START"): 5127 start = self._parse_bitwise() 5128 self._match_text_seq("INCREMENT") 5129 increment = self._parse_bitwise() 5130 5131 if start and increment: 5132 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5133 5134 return exp.AutoIncrementColumnConstraint() 5135 5136 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5137 if not self._match_text_seq("REFRESH"): 5138 self._retreat(self._index - 1) 5139 return None 5140 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5141 5142 def _parse_compress(self) -> exp.CompressColumnConstraint: 5143 if self._match(TokenType.L_PAREN, advance=False): 5144 return self.expression( 5145 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5146 ) 5147 5148 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5149 5150 def _parse_generated_as_identity( 5151 self, 5152 ) -> ( 5153 exp.GeneratedAsIdentityColumnConstraint 5154 | exp.ComputedColumnConstraint 5155 | exp.GeneratedAsRowColumnConstraint 5156 ): 5157 if self._match_text_seq("BY", "DEFAULT"): 5158 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5159 this = self.expression( 5160 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5161 ) 5162 else: 5163 self._match_text_seq("ALWAYS") 5164 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5165 5166 self._match(TokenType.ALIAS) 5167 5168 if self._match_text_seq("ROW"): 5169 start = self._match_text_seq("START") 5170 if not start: 5171 self._match(TokenType.END) 5172 hidden = self._match_text_seq("HIDDEN") 5173 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5174 5175 identity = self._match_text_seq("IDENTITY") 5176 5177 if self._match(TokenType.L_PAREN): 5178 if self._match(TokenType.START_WITH): 5179 this.set("start", self._parse_bitwise()) 5180 if self._match_text_seq("INCREMENT", "BY"): 5181 this.set("increment", self._parse_bitwise()) 5182 if self._match_text_seq("MINVALUE"): 5183 this.set("minvalue", self._parse_bitwise()) 5184 if self._match_text_seq("MAXVALUE"): 5185 this.set("maxvalue", self._parse_bitwise()) 5186 5187 if self._match_text_seq("CYCLE"): 5188 this.set("cycle", True) 5189 elif self._match_text_seq("NO", "CYCLE"): 5190 this.set("cycle", False) 5191 5192 if not identity: 5193 this.set("expression", self._parse_range()) 5194 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5195 args = self._parse_csv(self._parse_bitwise) 5196 this.set("start", seq_get(args, 0)) 5197 this.set("increment", seq_get(args, 1)) 5198 5199 self._match_r_paren() 5200 5201 return this 5202 5203 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5204 self._match_text_seq("LENGTH") 5205 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5206 5207 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5208 if self._match_text_seq("NULL"): 5209 return self.expression(exp.NotNullColumnConstraint) 5210 if self._match_text_seq("CASESPECIFIC"): 5211 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5212 if self._match_text_seq("FOR", "REPLICATION"): 5213 return self.expression(exp.NotForReplicationColumnConstraint) 5214 return None 5215 5216 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5217 if self._match(TokenType.CONSTRAINT): 5218 this = self._parse_id_var() 5219 else: 5220 this = None 5221 5222 if self._match_texts(self.CONSTRAINT_PARSERS): 5223 return self.expression( 5224 exp.ColumnConstraint, 5225 this=this, 5226 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5227 ) 5228 5229 return this 5230 5231 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5232 if not self._match(TokenType.CONSTRAINT): 5233 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5234 5235 return self.expression( 5236 exp.Constraint, 5237 this=self._parse_id_var(), 5238 expressions=self._parse_unnamed_constraints(), 5239 ) 5240 5241 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5242 constraints = [] 5243 while True: 5244 constraint = self._parse_unnamed_constraint() or self._parse_function() 5245 if not constraint: 5246 break 5247 constraints.append(constraint) 5248 5249 return constraints 5250 5251 def _parse_unnamed_constraint( 5252 self, constraints: t.Optional[t.Collection[str]] = None 5253 ) -> t.Optional[exp.Expression]: 5254 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5255 constraints or self.CONSTRAINT_PARSERS 5256 ): 5257 return None 5258 5259 constraint = self._prev.text.upper() 5260 if constraint not in self.CONSTRAINT_PARSERS: 5261 self.raise_error(f"No parser found for schema constraint {constraint}.") 5262 5263 return self.CONSTRAINT_PARSERS[constraint](self) 5264 5265 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5266 return self._parse_id_var(any_token=False) 5267 5268 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5269 self._match_text_seq("KEY") 5270 return self.expression( 5271 exp.UniqueColumnConstraint, 5272 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5273 this=self._parse_schema(self._parse_unique_key()), 5274 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5275 on_conflict=self._parse_on_conflict(), 5276 ) 5277 5278 def _parse_key_constraint_options(self) -> t.List[str]: 5279 options = [] 5280 while True: 5281 if not self._curr: 5282 break 5283 5284 if self._match(TokenType.ON): 5285 action = None 5286 on = self._advance_any() and self._prev.text 5287 5288 if self._match_text_seq("NO", "ACTION"): 5289 action = "NO ACTION" 5290 elif self._match_text_seq("CASCADE"): 5291 action = "CASCADE" 5292 elif self._match_text_seq("RESTRICT"): 5293 action = "RESTRICT" 5294 elif self._match_pair(TokenType.SET, TokenType.NULL): 5295 action = "SET NULL" 5296 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5297 action = "SET DEFAULT" 5298 else: 5299 self.raise_error("Invalid key constraint") 5300 5301 options.append(f"ON {on} {action}") 5302 else: 5303 var = self._parse_var_from_options( 5304 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5305 ) 5306 if not var: 5307 break 5308 options.append(var.name) 5309 5310 return options 5311 5312 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5313 if match and not self._match(TokenType.REFERENCES): 5314 return None 5315 5316 expressions = None 5317 this = self._parse_table(schema=True) 5318 options = self._parse_key_constraint_options() 5319 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5320 5321 def _parse_foreign_key(self) -> exp.ForeignKey: 5322 expressions = self._parse_wrapped_id_vars() 5323 reference = self._parse_references() 5324 options = {} 5325 5326 while self._match(TokenType.ON): 5327 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5328 self.raise_error("Expected DELETE or UPDATE") 5329 5330 kind = self._prev.text.lower() 5331 5332 if self._match_text_seq("NO", "ACTION"): 5333 action = "NO ACTION" 5334 elif self._match(TokenType.SET): 5335 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5336 action = "SET " + self._prev.text.upper() 5337 else: 5338 self._advance() 5339 action = self._prev.text.upper() 5340 5341 options[kind] = action 5342 5343 return self.expression( 5344 exp.ForeignKey, 5345 expressions=expressions, 5346 reference=reference, 5347 **options, # type: ignore 5348 ) 5349 5350 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5351 return self._parse_field() 5352 5353 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5354 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5355 self._retreat(self._index - 1) 5356 return None 5357 5358 id_vars = self._parse_wrapped_id_vars() 5359 return self.expression( 5360 exp.PeriodForSystemTimeConstraint, 5361 this=seq_get(id_vars, 0), 5362 expression=seq_get(id_vars, 1), 5363 ) 5364 5365 def _parse_primary_key( 5366 self, wrapped_optional: bool = False, in_props: bool = False 5367 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5368 desc = ( 5369 self._match_set((TokenType.ASC, TokenType.DESC)) 5370 and self._prev.token_type == TokenType.DESC 5371 ) 5372 5373 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5374 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5375 5376 expressions = self._parse_wrapped_csv( 5377 self._parse_primary_key_part, optional=wrapped_optional 5378 ) 5379 options = self._parse_key_constraint_options() 5380 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5381 5382 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5383 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5384 5385 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5386 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5387 return this 5388 5389 bracket_kind = self._prev.token_type 5390 expressions = self._parse_csv( 5391 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5392 ) 5393 5394 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5395 self.raise_error("Expected ]") 5396 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5397 self.raise_error("Expected }") 5398 5399 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5400 if bracket_kind == TokenType.L_BRACE: 5401 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5402 elif not this: 5403 this = self.expression(exp.Array, expressions=expressions) 5404 else: 5405 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5406 if constructor_type: 5407 return self.expression(constructor_type, expressions=expressions) 5408 5409 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5410 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5411 5412 self._add_comments(this) 5413 return self._parse_bracket(this) 5414 5415 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5416 if self._match(TokenType.COLON): 5417 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5418 return this 5419 5420 def _parse_case(self) -> t.Optional[exp.Expression]: 5421 ifs = [] 5422 default = None 5423 5424 comments = self._prev_comments 5425 expression = self._parse_assignment() 5426 5427 while self._match(TokenType.WHEN): 5428 this = self._parse_assignment() 5429 self._match(TokenType.THEN) 5430 then = self._parse_assignment() 5431 ifs.append(self.expression(exp.If, this=this, true=then)) 5432 5433 if self._match(TokenType.ELSE): 5434 default = self._parse_assignment() 5435 5436 if not self._match(TokenType.END): 5437 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5438 default = exp.column("interval") 5439 else: 5440 self.raise_error("Expected END after CASE", self._prev) 5441 5442 return self.expression( 5443 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5444 ) 5445 5446 def _parse_if(self) -> t.Optional[exp.Expression]: 5447 if self._match(TokenType.L_PAREN): 5448 args = self._parse_csv(self._parse_assignment) 5449 this = self.validate_expression(exp.If.from_arg_list(args), args) 5450 self._match_r_paren() 5451 else: 5452 index = self._index - 1 5453 5454 if self.NO_PAREN_IF_COMMANDS and index == 0: 5455 return self._parse_as_command(self._prev) 5456 5457 condition = self._parse_assignment() 5458 5459 if not condition: 5460 self._retreat(index) 5461 return None 5462 5463 self._match(TokenType.THEN) 5464 true = self._parse_assignment() 5465 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5466 self._match(TokenType.END) 5467 this = self.expression(exp.If, this=condition, true=true, false=false) 5468 5469 return this 5470 5471 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5472 if not self._match_text_seq("VALUE", "FOR"): 5473 self._retreat(self._index - 1) 5474 return None 5475 5476 return self.expression( 5477 exp.NextValueFor, 5478 this=self._parse_column(), 5479 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5480 ) 5481 5482 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5483 this = self._parse_function() or self._parse_var_or_string(upper=True) 5484 5485 if self._match(TokenType.FROM): 5486 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5487 5488 if not self._match(TokenType.COMMA): 5489 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5490 5491 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5492 5493 def _parse_gap_fill(self) -> exp.GapFill: 5494 self._match(TokenType.TABLE) 5495 this = self._parse_table() 5496 5497 self._match(TokenType.COMMA) 5498 args = [this, *self._parse_csv(self._parse_lambda)] 5499 5500 gap_fill = exp.GapFill.from_arg_list(args) 5501 return self.validate_expression(gap_fill, args) 5502 5503 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5504 this = self._parse_assignment() 5505 5506 if not self._match(TokenType.ALIAS): 5507 if self._match(TokenType.COMMA): 5508 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5509 5510 self.raise_error("Expected AS after CAST") 5511 5512 fmt = None 5513 to = self._parse_types() 5514 5515 if self._match(TokenType.FORMAT): 5516 fmt_string = self._parse_string() 5517 fmt = self._parse_at_time_zone(fmt_string) 5518 5519 if not to: 5520 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5521 if to.this in exp.DataType.TEMPORAL_TYPES: 5522 this = self.expression( 5523 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5524 this=this, 5525 format=exp.Literal.string( 5526 format_time( 5527 fmt_string.this if fmt_string else "", 5528 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5529 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5530 ) 5531 ), 5532 safe=safe, 5533 ) 5534 5535 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5536 this.set("zone", fmt.args["zone"]) 5537 return this 5538 elif not to: 5539 self.raise_error("Expected TYPE after CAST") 5540 elif isinstance(to, exp.Identifier): 5541 to = exp.DataType.build(to.name, udt=True) 5542 elif to.this == exp.DataType.Type.CHAR: 5543 if self._match(TokenType.CHARACTER_SET): 5544 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5545 5546 return self.expression( 5547 exp.Cast if strict else exp.TryCast, 5548 this=this, 5549 to=to, 5550 format=fmt, 5551 safe=safe, 5552 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5553 ) 5554 5555 def _parse_string_agg(self) -> exp.Expression: 5556 if self._match(TokenType.DISTINCT): 5557 args: t.List[t.Optional[exp.Expression]] = [ 5558 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5559 ] 5560 if self._match(TokenType.COMMA): 5561 args.extend(self._parse_csv(self._parse_assignment)) 5562 else: 5563 args = self._parse_csv(self._parse_assignment) # type: ignore 5564 5565 index = self._index 5566 if not self._match(TokenType.R_PAREN) and args: 5567 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5568 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5569 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5570 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5571 5572 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5573 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5574 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5575 if not self._match_text_seq("WITHIN", "GROUP"): 5576 self._retreat(index) 5577 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5578 5579 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5580 order = self._parse_order(this=seq_get(args, 0)) 5581 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5582 5583 def _parse_convert( 5584 self, strict: bool, safe: t.Optional[bool] = None 5585 ) -> t.Optional[exp.Expression]: 5586 this = self._parse_bitwise() 5587 5588 if self._match(TokenType.USING): 5589 to: t.Optional[exp.Expression] = self.expression( 5590 exp.CharacterSet, this=self._parse_var() 5591 ) 5592 elif self._match(TokenType.COMMA): 5593 to = self._parse_types() 5594 else: 5595 to = None 5596 5597 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5598 5599 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5600 """ 5601 There are generally two variants of the DECODE function: 5602 5603 - DECODE(bin, charset) 5604 - DECODE(expression, search, result [, search, result] ... [, default]) 5605 5606 The second variant will always be parsed into a CASE expression. Note that NULL 5607 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5608 instead of relying on pattern matching. 5609 """ 5610 args = self._parse_csv(self._parse_assignment) 5611 5612 if len(args) < 3: 5613 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5614 5615 expression, *expressions = args 5616 if not expression: 5617 return None 5618 5619 ifs = [] 5620 for search, result in zip(expressions[::2], expressions[1::2]): 5621 if not search or not result: 5622 return None 5623 5624 if isinstance(search, exp.Literal): 5625 ifs.append( 5626 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5627 ) 5628 elif isinstance(search, exp.Null): 5629 ifs.append( 5630 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5631 ) 5632 else: 5633 cond = exp.or_( 5634 exp.EQ(this=expression.copy(), expression=search), 5635 exp.and_( 5636 exp.Is(this=expression.copy(), expression=exp.Null()), 5637 exp.Is(this=search.copy(), expression=exp.Null()), 5638 copy=False, 5639 ), 5640 copy=False, 5641 ) 5642 ifs.append(exp.If(this=cond, true=result)) 5643 5644 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5645 5646 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5647 self._match_text_seq("KEY") 5648 key = self._parse_column() 5649 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5650 self._match_text_seq("VALUE") 5651 value = self._parse_bitwise() 5652 5653 if not key and not value: 5654 return None 5655 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5656 5657 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5658 if not this or not self._match_text_seq("FORMAT", "JSON"): 5659 return this 5660 5661 return self.expression(exp.FormatJson, this=this) 5662 5663 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5664 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5665 for value in values: 5666 if self._match_text_seq(value, "ON", on): 5667 return f"{value} ON {on}" 5668 5669 return None 5670 5671 @t.overload 5672 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5673 5674 @t.overload 5675 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5676 5677 def _parse_json_object(self, agg=False): 5678 star = self._parse_star() 5679 expressions = ( 5680 [star] 5681 if star 5682 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5683 ) 5684 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5685 5686 unique_keys = None 5687 if self._match_text_seq("WITH", "UNIQUE"): 5688 unique_keys = True 5689 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5690 unique_keys = False 5691 5692 self._match_text_seq("KEYS") 5693 5694 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5695 self._parse_type() 5696 ) 5697 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5698 5699 return self.expression( 5700 exp.JSONObjectAgg if agg else exp.JSONObject, 5701 expressions=expressions, 5702 null_handling=null_handling, 5703 unique_keys=unique_keys, 5704 return_type=return_type, 5705 encoding=encoding, 5706 ) 5707 5708 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5709 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5710 if not self._match_text_seq("NESTED"): 5711 this = self._parse_id_var() 5712 kind = self._parse_types(allow_identifiers=False) 5713 nested = None 5714 else: 5715 this = None 5716 kind = None 5717 nested = True 5718 5719 path = self._match_text_seq("PATH") and self._parse_string() 5720 nested_schema = nested and self._parse_json_schema() 5721 5722 return self.expression( 5723 exp.JSONColumnDef, 5724 this=this, 5725 kind=kind, 5726 path=path, 5727 nested_schema=nested_schema, 5728 ) 5729 5730 def _parse_json_schema(self) -> exp.JSONSchema: 5731 self._match_text_seq("COLUMNS") 5732 return self.expression( 5733 exp.JSONSchema, 5734 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5735 ) 5736 5737 def _parse_json_table(self) -> exp.JSONTable: 5738 this = self._parse_format_json(self._parse_bitwise()) 5739 path = self._match(TokenType.COMMA) and self._parse_string() 5740 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5741 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5742 schema = self._parse_json_schema() 5743 5744 return exp.JSONTable( 5745 this=this, 5746 schema=schema, 5747 path=path, 5748 error_handling=error_handling, 5749 empty_handling=empty_handling, 5750 ) 5751 5752 def _parse_match_against(self) -> exp.MatchAgainst: 5753 expressions = self._parse_csv(self._parse_column) 5754 5755 self._match_text_seq(")", "AGAINST", "(") 5756 5757 this = self._parse_string() 5758 5759 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5760 modifier = "IN NATURAL LANGUAGE MODE" 5761 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5762 modifier = f"{modifier} WITH QUERY EXPANSION" 5763 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5764 modifier = "IN BOOLEAN MODE" 5765 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5766 modifier = "WITH QUERY EXPANSION" 5767 else: 5768 modifier = None 5769 5770 return self.expression( 5771 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5772 ) 5773 5774 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5775 def _parse_open_json(self) -> exp.OpenJSON: 5776 this = self._parse_bitwise() 5777 path = self._match(TokenType.COMMA) and self._parse_string() 5778 5779 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5780 this = self._parse_field(any_token=True) 5781 kind = self._parse_types() 5782 path = self._parse_string() 5783 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5784 5785 return self.expression( 5786 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5787 ) 5788 5789 expressions = None 5790 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5791 self._match_l_paren() 5792 expressions = self._parse_csv(_parse_open_json_column_def) 5793 5794 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5795 5796 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5797 args = self._parse_csv(self._parse_bitwise) 5798 5799 if self._match(TokenType.IN): 5800 return self.expression( 5801 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5802 ) 5803 5804 if haystack_first: 5805 haystack = seq_get(args, 0) 5806 needle = seq_get(args, 1) 5807 else: 5808 needle = seq_get(args, 0) 5809 haystack = seq_get(args, 1) 5810 5811 return self.expression( 5812 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5813 ) 5814 5815 def _parse_predict(self) -> exp.Predict: 5816 self._match_text_seq("MODEL") 5817 this = self._parse_table() 5818 5819 self._match(TokenType.COMMA) 5820 self._match_text_seq("TABLE") 5821 5822 return self.expression( 5823 exp.Predict, 5824 this=this, 5825 expression=self._parse_table(), 5826 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5827 ) 5828 5829 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5830 args = self._parse_csv(self._parse_table) 5831 return exp.JoinHint(this=func_name.upper(), expressions=args) 5832 5833 def _parse_substring(self) -> exp.Substring: 5834 # Postgres supports the form: substring(string [from int] [for int]) 5835 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5836 5837 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5838 5839 if self._match(TokenType.FROM): 5840 args.append(self._parse_bitwise()) 5841 if self._match(TokenType.FOR): 5842 if len(args) == 1: 5843 args.append(exp.Literal.number(1)) 5844 args.append(self._parse_bitwise()) 5845 5846 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5847 5848 def _parse_trim(self) -> exp.Trim: 5849 # https://www.w3resource.com/sql/character-functions/trim.php 5850 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5851 5852 position = None 5853 collation = None 5854 expression = None 5855 5856 if self._match_texts(self.TRIM_TYPES): 5857 position = self._prev.text.upper() 5858 5859 this = self._parse_bitwise() 5860 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5861 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5862 expression = self._parse_bitwise() 5863 5864 if invert_order: 5865 this, expression = expression, this 5866 5867 if self._match(TokenType.COLLATE): 5868 collation = self._parse_bitwise() 5869 5870 return self.expression( 5871 exp.Trim, this=this, position=position, expression=expression, collation=collation 5872 ) 5873 5874 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5875 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5876 5877 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5878 return self._parse_window(self._parse_id_var(), alias=True) 5879 5880 def _parse_respect_or_ignore_nulls( 5881 self, this: t.Optional[exp.Expression] 5882 ) -> t.Optional[exp.Expression]: 5883 if self._match_text_seq("IGNORE", "NULLS"): 5884 return self.expression(exp.IgnoreNulls, this=this) 5885 if self._match_text_seq("RESPECT", "NULLS"): 5886 return self.expression(exp.RespectNulls, this=this) 5887 return this 5888 5889 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5890 if self._match(TokenType.HAVING): 5891 self._match_texts(("MAX", "MIN")) 5892 max = self._prev.text.upper() != "MIN" 5893 return self.expression( 5894 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5895 ) 5896 5897 return this 5898 5899 def _parse_window( 5900 self, this: t.Optional[exp.Expression], alias: bool = False 5901 ) -> t.Optional[exp.Expression]: 5902 func = this 5903 comments = func.comments if isinstance(func, exp.Expression) else None 5904 5905 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5906 self._match(TokenType.WHERE) 5907 this = self.expression( 5908 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5909 ) 5910 self._match_r_paren() 5911 5912 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5913 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5914 if self._match_text_seq("WITHIN", "GROUP"): 5915 order = self._parse_wrapped(self._parse_order) 5916 this = self.expression(exp.WithinGroup, this=this, expression=order) 5917 5918 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5919 # Some dialects choose to implement and some do not. 5920 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5921 5922 # There is some code above in _parse_lambda that handles 5923 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5924 5925 # The below changes handle 5926 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5927 5928 # Oracle allows both formats 5929 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5930 # and Snowflake chose to do the same for familiarity 5931 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5932 if isinstance(this, exp.AggFunc): 5933 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5934 5935 if ignore_respect and ignore_respect is not this: 5936 ignore_respect.replace(ignore_respect.this) 5937 this = self.expression(ignore_respect.__class__, this=this) 5938 5939 this = self._parse_respect_or_ignore_nulls(this) 5940 5941 # bigquery select from window x AS (partition by ...) 5942 if alias: 5943 over = None 5944 self._match(TokenType.ALIAS) 5945 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5946 return this 5947 else: 5948 over = self._prev.text.upper() 5949 5950 if comments and isinstance(func, exp.Expression): 5951 func.pop_comments() 5952 5953 if not self._match(TokenType.L_PAREN): 5954 return self.expression( 5955 exp.Window, 5956 comments=comments, 5957 this=this, 5958 alias=self._parse_id_var(False), 5959 over=over, 5960 ) 5961 5962 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5963 5964 first = self._match(TokenType.FIRST) 5965 if self._match_text_seq("LAST"): 5966 first = False 5967 5968 partition, order = self._parse_partition_and_order() 5969 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5970 5971 if kind: 5972 self._match(TokenType.BETWEEN) 5973 start = self._parse_window_spec() 5974 self._match(TokenType.AND) 5975 end = self._parse_window_spec() 5976 5977 spec = self.expression( 5978 exp.WindowSpec, 5979 kind=kind, 5980 start=start["value"], 5981 start_side=start["side"], 5982 end=end["value"], 5983 end_side=end["side"], 5984 ) 5985 else: 5986 spec = None 5987 5988 self._match_r_paren() 5989 5990 window = self.expression( 5991 exp.Window, 5992 comments=comments, 5993 this=this, 5994 partition_by=partition, 5995 order=order, 5996 spec=spec, 5997 alias=window_alias, 5998 over=over, 5999 first=first, 6000 ) 6001 6002 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6003 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6004 return self._parse_window(window, alias=alias) 6005 6006 return window 6007 6008 def _parse_partition_and_order( 6009 self, 6010 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6011 return self._parse_partition_by(), self._parse_order() 6012 6013 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6014 self._match(TokenType.BETWEEN) 6015 6016 return { 6017 "value": ( 6018 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6019 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6020 or self._parse_bitwise() 6021 ), 6022 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6023 } 6024 6025 def _parse_alias( 6026 self, this: t.Optional[exp.Expression], explicit: bool = False 6027 ) -> t.Optional[exp.Expression]: 6028 any_token = self._match(TokenType.ALIAS) 6029 comments = self._prev_comments or [] 6030 6031 if explicit and not any_token: 6032 return this 6033 6034 if self._match(TokenType.L_PAREN): 6035 aliases = self.expression( 6036 exp.Aliases, 6037 comments=comments, 6038 this=this, 6039 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6040 ) 6041 self._match_r_paren(aliases) 6042 return aliases 6043 6044 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6045 self.STRING_ALIASES and self._parse_string_as_identifier() 6046 ) 6047 6048 if alias: 6049 comments.extend(alias.pop_comments()) 6050 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6051 column = this.this 6052 6053 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6054 if not this.comments and column and column.comments: 6055 this.comments = column.pop_comments() 6056 6057 return this 6058 6059 def _parse_id_var( 6060 self, 6061 any_token: bool = True, 6062 tokens: t.Optional[t.Collection[TokenType]] = None, 6063 ) -> t.Optional[exp.Expression]: 6064 expression = self._parse_identifier() 6065 if not expression and ( 6066 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6067 ): 6068 quoted = self._prev.token_type == TokenType.STRING 6069 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6070 6071 return expression 6072 6073 def _parse_string(self) -> t.Optional[exp.Expression]: 6074 if self._match_set(self.STRING_PARSERS): 6075 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6076 return self._parse_placeholder() 6077 6078 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6079 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6080 6081 def _parse_number(self) -> t.Optional[exp.Expression]: 6082 if self._match_set(self.NUMERIC_PARSERS): 6083 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6084 return self._parse_placeholder() 6085 6086 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6087 if self._match(TokenType.IDENTIFIER): 6088 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6089 return self._parse_placeholder() 6090 6091 def _parse_var( 6092 self, 6093 any_token: bool = False, 6094 tokens: t.Optional[t.Collection[TokenType]] = None, 6095 upper: bool = False, 6096 ) -> t.Optional[exp.Expression]: 6097 if ( 6098 (any_token and self._advance_any()) 6099 or self._match(TokenType.VAR) 6100 or (self._match_set(tokens) if tokens else False) 6101 ): 6102 return self.expression( 6103 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6104 ) 6105 return self._parse_placeholder() 6106 6107 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6108 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6109 self._advance() 6110 return self._prev 6111 return None 6112 6113 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6114 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6115 6116 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6117 return self._parse_primary() or self._parse_var(any_token=True) 6118 6119 def _parse_null(self) -> t.Optional[exp.Expression]: 6120 if self._match_set(self.NULL_TOKENS): 6121 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6122 return self._parse_placeholder() 6123 6124 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6125 if self._match(TokenType.TRUE): 6126 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6127 if self._match(TokenType.FALSE): 6128 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6129 return self._parse_placeholder() 6130 6131 def _parse_star(self) -> t.Optional[exp.Expression]: 6132 if self._match(TokenType.STAR): 6133 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6134 return self._parse_placeholder() 6135 6136 def _parse_parameter(self) -> exp.Parameter: 6137 this = self._parse_identifier() or self._parse_primary_or_var() 6138 return self.expression(exp.Parameter, this=this) 6139 6140 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6141 if self._match_set(self.PLACEHOLDER_PARSERS): 6142 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6143 if placeholder: 6144 return placeholder 6145 self._advance(-1) 6146 return None 6147 6148 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6149 if not self._match_texts(keywords): 6150 return None 6151 if self._match(TokenType.L_PAREN, advance=False): 6152 return self._parse_wrapped_csv(self._parse_expression) 6153 6154 expression = self._parse_expression() 6155 return [expression] if expression else None 6156 6157 def _parse_csv( 6158 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6159 ) -> t.List[exp.Expression]: 6160 parse_result = parse_method() 6161 items = [parse_result] if parse_result is not None else [] 6162 6163 while self._match(sep): 6164 self._add_comments(parse_result) 6165 parse_result = parse_method() 6166 if parse_result is not None: 6167 items.append(parse_result) 6168 6169 return items 6170 6171 def _parse_tokens( 6172 self, parse_method: t.Callable, expressions: t.Dict 6173 ) -> t.Optional[exp.Expression]: 6174 this = parse_method() 6175 6176 while self._match_set(expressions): 6177 this = self.expression( 6178 expressions[self._prev.token_type], 6179 this=this, 6180 comments=self._prev_comments, 6181 expression=parse_method(), 6182 ) 6183 6184 return this 6185 6186 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6187 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6188 6189 def _parse_wrapped_csv( 6190 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6191 ) -> t.List[exp.Expression]: 6192 return self._parse_wrapped( 6193 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6194 ) 6195 6196 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6197 wrapped = self._match(TokenType.L_PAREN) 6198 if not wrapped and not optional: 6199 self.raise_error("Expecting (") 6200 parse_result = parse_method() 6201 if wrapped: 6202 self._match_r_paren() 6203 return parse_result 6204 6205 def _parse_expressions(self) -> t.List[exp.Expression]: 6206 return self._parse_csv(self._parse_expression) 6207 6208 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6209 return self._parse_select() or self._parse_set_operations( 6210 self._parse_expression() if alias else self._parse_assignment() 6211 ) 6212 6213 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6214 return self._parse_query_modifiers( 6215 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6216 ) 6217 6218 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6219 this = None 6220 if self._match_texts(self.TRANSACTION_KIND): 6221 this = self._prev.text 6222 6223 self._match_texts(("TRANSACTION", "WORK")) 6224 6225 modes = [] 6226 while True: 6227 mode = [] 6228 while self._match(TokenType.VAR): 6229 mode.append(self._prev.text) 6230 6231 if mode: 6232 modes.append(" ".join(mode)) 6233 if not self._match(TokenType.COMMA): 6234 break 6235 6236 return self.expression(exp.Transaction, this=this, modes=modes) 6237 6238 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6239 chain = None 6240 savepoint = None 6241 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6242 6243 self._match_texts(("TRANSACTION", "WORK")) 6244 6245 if self._match_text_seq("TO"): 6246 self._match_text_seq("SAVEPOINT") 6247 savepoint = self._parse_id_var() 6248 6249 if self._match(TokenType.AND): 6250 chain = not self._match_text_seq("NO") 6251 self._match_text_seq("CHAIN") 6252 6253 if is_rollback: 6254 return self.expression(exp.Rollback, savepoint=savepoint) 6255 6256 return self.expression(exp.Commit, chain=chain) 6257 6258 def _parse_refresh(self) -> exp.Refresh: 6259 self._match(TokenType.TABLE) 6260 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6261 6262 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6263 if not self._match_text_seq("ADD"): 6264 return None 6265 6266 self._match(TokenType.COLUMN) 6267 exists_column = self._parse_exists(not_=True) 6268 expression = self._parse_field_def() 6269 6270 if expression: 6271 expression.set("exists", exists_column) 6272 6273 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6274 if self._match_texts(("FIRST", "AFTER")): 6275 position = self._prev.text 6276 column_position = self.expression( 6277 exp.ColumnPosition, this=self._parse_column(), position=position 6278 ) 6279 expression.set("position", column_position) 6280 6281 return expression 6282 6283 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6284 drop = self._match(TokenType.DROP) and self._parse_drop() 6285 if drop and not isinstance(drop, exp.Command): 6286 drop.set("kind", drop.args.get("kind", "COLUMN")) 6287 return drop 6288 6289 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6290 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6291 return self.expression( 6292 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6293 ) 6294 6295 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6296 index = self._index - 1 6297 6298 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6299 return self._parse_csv( 6300 lambda: self.expression( 6301 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6302 ) 6303 ) 6304 6305 self._retreat(index) 6306 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6307 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6308 6309 if self._match_text_seq("ADD", "COLUMNS"): 6310 schema = self._parse_schema() 6311 if schema: 6312 return [schema] 6313 return [] 6314 6315 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6316 6317 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6318 if self._match_texts(self.ALTER_ALTER_PARSERS): 6319 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6320 6321 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6322 # keyword after ALTER we default to parsing this statement 6323 self._match(TokenType.COLUMN) 6324 column = self._parse_field(any_token=True) 6325 6326 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6327 return self.expression(exp.AlterColumn, this=column, drop=True) 6328 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6329 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6330 if self._match(TokenType.COMMENT): 6331 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6332 if self._match_text_seq("DROP", "NOT", "NULL"): 6333 return self.expression( 6334 exp.AlterColumn, 6335 this=column, 6336 drop=True, 6337 allow_null=True, 6338 ) 6339 if self._match_text_seq("SET", "NOT", "NULL"): 6340 return self.expression( 6341 exp.AlterColumn, 6342 this=column, 6343 allow_null=False, 6344 ) 6345 self._match_text_seq("SET", "DATA") 6346 self._match_text_seq("TYPE") 6347 return self.expression( 6348 exp.AlterColumn, 6349 this=column, 6350 dtype=self._parse_types(), 6351 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6352 using=self._match(TokenType.USING) and self._parse_assignment(), 6353 ) 6354 6355 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6356 if self._match_texts(("ALL", "EVEN", "AUTO")): 6357 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6358 6359 self._match_text_seq("KEY", "DISTKEY") 6360 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6361 6362 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6363 if compound: 6364 self._match_text_seq("SORTKEY") 6365 6366 if self._match(TokenType.L_PAREN, advance=False): 6367 return self.expression( 6368 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6369 ) 6370 6371 self._match_texts(("AUTO", "NONE")) 6372 return self.expression( 6373 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6374 ) 6375 6376 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6377 index = self._index - 1 6378 6379 partition_exists = self._parse_exists() 6380 if self._match(TokenType.PARTITION, advance=False): 6381 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6382 6383 self._retreat(index) 6384 return self._parse_csv(self._parse_drop_column) 6385 6386 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6387 if self._match(TokenType.COLUMN): 6388 exists = self._parse_exists() 6389 old_column = self._parse_column() 6390 to = self._match_text_seq("TO") 6391 new_column = self._parse_column() 6392 6393 if old_column is None or to is None or new_column is None: 6394 return None 6395 6396 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6397 6398 self._match_text_seq("TO") 6399 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6400 6401 def _parse_alter_table_set(self) -> exp.AlterSet: 6402 alter_set = self.expression(exp.AlterSet) 6403 6404 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6405 "TABLE", "PROPERTIES" 6406 ): 6407 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6408 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6409 alter_set.set("expressions", [self._parse_assignment()]) 6410 elif self._match_texts(("LOGGED", "UNLOGGED")): 6411 alter_set.set("option", exp.var(self._prev.text.upper())) 6412 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6413 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6414 elif self._match_text_seq("LOCATION"): 6415 alter_set.set("location", self._parse_field()) 6416 elif self._match_text_seq("ACCESS", "METHOD"): 6417 alter_set.set("access_method", self._parse_field()) 6418 elif self._match_text_seq("TABLESPACE"): 6419 alter_set.set("tablespace", self._parse_field()) 6420 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6421 alter_set.set("file_format", [self._parse_field()]) 6422 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6423 alter_set.set("file_format", self._parse_wrapped_options()) 6424 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6425 alter_set.set("copy_options", self._parse_wrapped_options()) 6426 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6427 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6428 else: 6429 if self._match_text_seq("SERDE"): 6430 alter_set.set("serde", self._parse_field()) 6431 6432 alter_set.set("expressions", [self._parse_properties()]) 6433 6434 return alter_set 6435 6436 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6437 start = self._prev 6438 6439 if not self._match(TokenType.TABLE): 6440 return self._parse_as_command(start) 6441 6442 exists = self._parse_exists() 6443 only = self._match_text_seq("ONLY") 6444 this = self._parse_table(schema=True) 6445 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6446 6447 if self._next: 6448 self._advance() 6449 6450 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6451 if parser: 6452 actions = ensure_list(parser(self)) 6453 options = self._parse_csv(self._parse_property) 6454 6455 if not self._curr and actions: 6456 return self.expression( 6457 exp.AlterTable, 6458 this=this, 6459 exists=exists, 6460 actions=actions, 6461 only=only, 6462 options=options, 6463 cluster=cluster, 6464 ) 6465 6466 return self._parse_as_command(start) 6467 6468 def _parse_merge(self) -> exp.Merge: 6469 self._match(TokenType.INTO) 6470 target = self._parse_table() 6471 6472 if target and self._match(TokenType.ALIAS, advance=False): 6473 target.set("alias", self._parse_table_alias()) 6474 6475 self._match(TokenType.USING) 6476 using = self._parse_table() 6477 6478 self._match(TokenType.ON) 6479 on = self._parse_assignment() 6480 6481 return self.expression( 6482 exp.Merge, 6483 this=target, 6484 using=using, 6485 on=on, 6486 expressions=self._parse_when_matched(), 6487 ) 6488 6489 def _parse_when_matched(self) -> t.List[exp.When]: 6490 whens = [] 6491 6492 while self._match(TokenType.WHEN): 6493 matched = not self._match(TokenType.NOT) 6494 self._match_text_seq("MATCHED") 6495 source = ( 6496 False 6497 if self._match_text_seq("BY", "TARGET") 6498 else self._match_text_seq("BY", "SOURCE") 6499 ) 6500 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6501 6502 self._match(TokenType.THEN) 6503 6504 if self._match(TokenType.INSERT): 6505 _this = self._parse_star() 6506 if _this: 6507 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6508 else: 6509 then = self.expression( 6510 exp.Insert, 6511 this=self._parse_value(), 6512 expression=self._match_text_seq("VALUES") and self._parse_value(), 6513 ) 6514 elif self._match(TokenType.UPDATE): 6515 expressions = self._parse_star() 6516 if expressions: 6517 then = self.expression(exp.Update, expressions=expressions) 6518 else: 6519 then = self.expression( 6520 exp.Update, 6521 expressions=self._match(TokenType.SET) 6522 and self._parse_csv(self._parse_equality), 6523 ) 6524 elif self._match(TokenType.DELETE): 6525 then = self.expression(exp.Var, this=self._prev.text) 6526 else: 6527 then = None 6528 6529 whens.append( 6530 self.expression( 6531 exp.When, 6532 matched=matched, 6533 source=source, 6534 condition=condition, 6535 then=then, 6536 ) 6537 ) 6538 return whens 6539 6540 def _parse_show(self) -> t.Optional[exp.Expression]: 6541 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6542 if parser: 6543 return parser(self) 6544 return self._parse_as_command(self._prev) 6545 6546 def _parse_set_item_assignment( 6547 self, kind: t.Optional[str] = None 6548 ) -> t.Optional[exp.Expression]: 6549 index = self._index 6550 6551 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6552 return self._parse_set_transaction(global_=kind == "GLOBAL") 6553 6554 left = self._parse_primary() or self._parse_column() 6555 assignment_delimiter = self._match_texts(("=", "TO")) 6556 6557 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6558 self._retreat(index) 6559 return None 6560 6561 right = self._parse_statement() or self._parse_id_var() 6562 if isinstance(right, (exp.Column, exp.Identifier)): 6563 right = exp.var(right.name) 6564 6565 this = self.expression(exp.EQ, this=left, expression=right) 6566 return self.expression(exp.SetItem, this=this, kind=kind) 6567 6568 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6569 self._match_text_seq("TRANSACTION") 6570 characteristics = self._parse_csv( 6571 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6572 ) 6573 return self.expression( 6574 exp.SetItem, 6575 expressions=characteristics, 6576 kind="TRANSACTION", 6577 **{"global": global_}, # type: ignore 6578 ) 6579 6580 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6581 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6582 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6583 6584 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6585 index = self._index 6586 set_ = self.expression( 6587 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6588 ) 6589 6590 if self._curr: 6591 self._retreat(index) 6592 return self._parse_as_command(self._prev) 6593 6594 return set_ 6595 6596 def _parse_var_from_options( 6597 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6598 ) -> t.Optional[exp.Var]: 6599 start = self._curr 6600 if not start: 6601 return None 6602 6603 option = start.text.upper() 6604 continuations = options.get(option) 6605 6606 index = self._index 6607 self._advance() 6608 for keywords in continuations or []: 6609 if isinstance(keywords, str): 6610 keywords = (keywords,) 6611 6612 if self._match_text_seq(*keywords): 6613 option = f"{option} {' '.join(keywords)}" 6614 break 6615 else: 6616 if continuations or continuations is None: 6617 if raise_unmatched: 6618 self.raise_error(f"Unknown option {option}") 6619 6620 self._retreat(index) 6621 return None 6622 6623 return exp.var(option) 6624 6625 def _parse_as_command(self, start: Token) -> exp.Command: 6626 while self._curr: 6627 self._advance() 6628 text = self._find_sql(start, self._prev) 6629 size = len(start.text) 6630 self._warn_unsupported() 6631 return exp.Command(this=text[:size], expression=text[size:]) 6632 6633 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6634 settings = [] 6635 6636 self._match_l_paren() 6637 kind = self._parse_id_var() 6638 6639 if self._match(TokenType.L_PAREN): 6640 while True: 6641 key = self._parse_id_var() 6642 value = self._parse_primary() 6643 6644 if not key and value is None: 6645 break 6646 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6647 self._match(TokenType.R_PAREN) 6648 6649 self._match_r_paren() 6650 6651 return self.expression( 6652 exp.DictProperty, 6653 this=this, 6654 kind=kind.this if kind else None, 6655 settings=settings, 6656 ) 6657 6658 def _parse_dict_range(self, this: str) -> exp.DictRange: 6659 self._match_l_paren() 6660 has_min = self._match_text_seq("MIN") 6661 if has_min: 6662 min = self._parse_var() or self._parse_primary() 6663 self._match_text_seq("MAX") 6664 max = self._parse_var() or self._parse_primary() 6665 else: 6666 max = self._parse_var() or self._parse_primary() 6667 min = exp.Literal.number(0) 6668 self._match_r_paren() 6669 return self.expression(exp.DictRange, this=this, min=min, max=max) 6670 6671 def _parse_comprehension( 6672 self, this: t.Optional[exp.Expression] 6673 ) -> t.Optional[exp.Comprehension]: 6674 index = self._index 6675 expression = self._parse_column() 6676 if not self._match(TokenType.IN): 6677 self._retreat(index - 1) 6678 return None 6679 iterator = self._parse_column() 6680 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6681 return self.expression( 6682 exp.Comprehension, 6683 this=this, 6684 expression=expression, 6685 iterator=iterator, 6686 condition=condition, 6687 ) 6688 6689 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6690 if self._match(TokenType.HEREDOC_STRING): 6691 return self.expression(exp.Heredoc, this=self._prev.text) 6692 6693 if not self._match_text_seq("$"): 6694 return None 6695 6696 tags = ["$"] 6697 tag_text = None 6698 6699 if self._is_connected(): 6700 self._advance() 6701 tags.append(self._prev.text.upper()) 6702 else: 6703 self.raise_error("No closing $ found") 6704 6705 if tags[-1] != "$": 6706 if self._is_connected() and self._match_text_seq("$"): 6707 tag_text = tags[-1] 6708 tags.append("$") 6709 else: 6710 self.raise_error("No closing $ found") 6711 6712 heredoc_start = self._curr 6713 6714 while self._curr: 6715 if self._match_text_seq(*tags, advance=False): 6716 this = self._find_sql(heredoc_start, self._prev) 6717 self._advance(len(tags)) 6718 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6719 6720 self._advance() 6721 6722 self.raise_error(f"No closing {''.join(tags)} found") 6723 return None 6724 6725 def _find_parser( 6726 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6727 ) -> t.Optional[t.Callable]: 6728 if not self._curr: 6729 return None 6730 6731 index = self._index 6732 this = [] 6733 while True: 6734 # The current token might be multiple words 6735 curr = self._curr.text.upper() 6736 key = curr.split(" ") 6737 this.append(curr) 6738 6739 self._advance() 6740 result, trie = in_trie(trie, key) 6741 if result == TrieResult.FAILED: 6742 break 6743 6744 if result == TrieResult.EXISTS: 6745 subparser = parsers[" ".join(this)] 6746 return subparser 6747 6748 self._retreat(index) 6749 return None 6750 6751 def _match(self, token_type, advance=True, expression=None): 6752 if not self._curr: 6753 return None 6754 6755 if self._curr.token_type == token_type: 6756 if advance: 6757 self._advance() 6758 self._add_comments(expression) 6759 return True 6760 6761 return None 6762 6763 def _match_set(self, types, advance=True): 6764 if not self._curr: 6765 return None 6766 6767 if self._curr.token_type in types: 6768 if advance: 6769 self._advance() 6770 return True 6771 6772 return None 6773 6774 def _match_pair(self, token_type_a, token_type_b, advance=True): 6775 if not self._curr or not self._next: 6776 return None 6777 6778 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6779 if advance: 6780 self._advance(2) 6781 return True 6782 6783 return None 6784 6785 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6786 if not self._match(TokenType.L_PAREN, expression=expression): 6787 self.raise_error("Expecting (") 6788 6789 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6790 if not self._match(TokenType.R_PAREN, expression=expression): 6791 self.raise_error("Expecting )") 6792 6793 def _match_texts(self, texts, advance=True): 6794 if self._curr and self._curr.text.upper() in texts: 6795 if advance: 6796 self._advance() 6797 return True 6798 return None 6799 6800 def _match_text_seq(self, *texts, advance=True): 6801 index = self._index 6802 for text in texts: 6803 if self._curr and self._curr.text.upper() == text: 6804 self._advance() 6805 else: 6806 self._retreat(index) 6807 return None 6808 6809 if not advance: 6810 self._retreat(index) 6811 6812 return True 6813 6814 def _replace_lambda( 6815 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6816 ) -> t.Optional[exp.Expression]: 6817 if not node: 6818 return node 6819 6820 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6821 6822 for column in node.find_all(exp.Column): 6823 typ = lambda_types.get(column.parts[0].name) 6824 if typ is not None: 6825 dot_or_id = column.to_dot() if column.table else column.this 6826 6827 if typ: 6828 dot_or_id = self.expression( 6829 exp.Cast, 6830 this=dot_or_id, 6831 to=typ, 6832 ) 6833 6834 parent = column.parent 6835 6836 while isinstance(parent, exp.Dot): 6837 if not isinstance(parent.parent, exp.Dot): 6838 parent.replace(dot_or_id) 6839 break 6840 parent = parent.parent 6841 else: 6842 if column is node: 6843 node = dot_or_id 6844 else: 6845 column.replace(dot_or_id) 6846 return node 6847 6848 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6849 start = self._prev 6850 6851 # Not to be confused with TRUNCATE(number, decimals) function call 6852 if self._match(TokenType.L_PAREN): 6853 self._retreat(self._index - 2) 6854 return self._parse_function() 6855 6856 # Clickhouse supports TRUNCATE DATABASE as well 6857 is_database = self._match(TokenType.DATABASE) 6858 6859 self._match(TokenType.TABLE) 6860 6861 exists = self._parse_exists(not_=False) 6862 6863 expressions = self._parse_csv( 6864 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6865 ) 6866 6867 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6868 6869 if self._match_text_seq("RESTART", "IDENTITY"): 6870 identity = "RESTART" 6871 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6872 identity = "CONTINUE" 6873 else: 6874 identity = None 6875 6876 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6877 option = self._prev.text 6878 else: 6879 option = None 6880 6881 partition = self._parse_partition() 6882 6883 # Fallback case 6884 if self._curr: 6885 return self._parse_as_command(start) 6886 6887 return self.expression( 6888 exp.TruncateTable, 6889 expressions=expressions, 6890 is_database=is_database, 6891 exists=exists, 6892 cluster=cluster, 6893 identity=identity, 6894 option=option, 6895 partition=partition, 6896 ) 6897 6898 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6899 this = self._parse_ordered(self._parse_opclass) 6900 6901 if not self._match(TokenType.WITH): 6902 return this 6903 6904 op = self._parse_var(any_token=True) 6905 6906 return self.expression(exp.WithOperator, this=this, op=op) 6907 6908 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6909 self._match(TokenType.EQ) 6910 self._match(TokenType.L_PAREN) 6911 6912 opts: t.List[t.Optional[exp.Expression]] = [] 6913 while self._curr and not self._match(TokenType.R_PAREN): 6914 if self._match_text_seq("FORMAT_NAME", "="): 6915 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6916 # so we parse it separately to use _parse_field() 6917 prop = self.expression( 6918 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6919 ) 6920 opts.append(prop) 6921 else: 6922 opts.append(self._parse_property()) 6923 6924 self._match(TokenType.COMMA) 6925 6926 return opts 6927 6928 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6929 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6930 6931 options = [] 6932 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6933 option = self._parse_var(any_token=True) 6934 prev = self._prev.text.upper() 6935 6936 # Different dialects might separate options and values by white space, "=" and "AS" 6937 self._match(TokenType.EQ) 6938 self._match(TokenType.ALIAS) 6939 6940 param = self.expression(exp.CopyParameter, this=option) 6941 6942 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6943 TokenType.L_PAREN, advance=False 6944 ): 6945 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6946 param.set("expressions", self._parse_wrapped_options()) 6947 elif prev == "FILE_FORMAT": 6948 # T-SQL's external file format case 6949 param.set("expression", self._parse_field()) 6950 else: 6951 param.set("expression", self._parse_unquoted_field()) 6952 6953 options.append(param) 6954 self._match(sep) 6955 6956 return options 6957 6958 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6959 expr = self.expression(exp.Credentials) 6960 6961 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6962 expr.set("storage", self._parse_field()) 6963 if self._match_text_seq("CREDENTIALS"): 6964 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6965 creds = ( 6966 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6967 ) 6968 expr.set("credentials", creds) 6969 if self._match_text_seq("ENCRYPTION"): 6970 expr.set("encryption", self._parse_wrapped_options()) 6971 if self._match_text_seq("IAM_ROLE"): 6972 expr.set("iam_role", self._parse_field()) 6973 if self._match_text_seq("REGION"): 6974 expr.set("region", self._parse_field()) 6975 6976 return expr 6977 6978 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6979 return self._parse_field() 6980 6981 def _parse_copy(self) -> exp.Copy | exp.Command: 6982 start = self._prev 6983 6984 self._match(TokenType.INTO) 6985 6986 this = ( 6987 self._parse_select(nested=True, parse_subquery_alias=False) 6988 if self._match(TokenType.L_PAREN, advance=False) 6989 else self._parse_table(schema=True) 6990 ) 6991 6992 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6993 6994 files = self._parse_csv(self._parse_file_location) 6995 credentials = self._parse_credentials() 6996 6997 self._match_text_seq("WITH") 6998 6999 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7000 7001 # Fallback case 7002 if self._curr: 7003 return self._parse_as_command(start) 7004 7005 return self.expression( 7006 exp.Copy, 7007 this=this, 7008 kind=kind, 7009 credentials=credentials, 7010 files=files, 7011 params=params, 7012 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
131class Parser(metaclass=_Parser): 132 """ 133 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 134 135 Args: 136 error_level: The desired error level. 137 Default: ErrorLevel.IMMEDIATE 138 error_message_context: The amount of context to capture from a query string when displaying 139 the error message (in number of characters). 140 Default: 100 141 max_errors: Maximum number of error messages to include in a raised ParseError. 142 This is only relevant if error_level is ErrorLevel.RAISE. 143 Default: 3 144 """ 145 146 FUNCTIONS: t.Dict[str, t.Callable] = { 147 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 148 "CONCAT": lambda args, dialect: exp.Concat( 149 expressions=args, 150 safe=not dialect.STRICT_STRING_CONCAT, 151 coalesce=dialect.CONCAT_COALESCE, 152 ), 153 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 154 expressions=args, 155 safe=not dialect.STRICT_STRING_CONCAT, 156 coalesce=dialect.CONCAT_COALESCE, 157 ), 158 "DATE_TO_DATE_STR": lambda args: exp.Cast( 159 this=seq_get(args, 0), 160 to=exp.DataType(this=exp.DataType.Type.TEXT), 161 ), 162 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 163 "HEX": build_hex, 164 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 165 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 166 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 167 "LIKE": build_like, 168 "LOG": build_logarithm, 169 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 170 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 171 "LOWER": build_lower, 172 "LPAD": lambda args: build_pad(args), 173 "LEFTPAD": lambda args: build_pad(args), 174 "MOD": build_mod, 175 "RPAD": lambda args: build_pad(args, is_left=False), 176 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 177 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 178 if len(args) != 2 179 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 180 "TIME_TO_TIME_STR": lambda args: exp.Cast( 181 this=seq_get(args, 0), 182 to=exp.DataType(this=exp.DataType.Type.TEXT), 183 ), 184 "TO_HEX": build_hex, 185 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 186 this=exp.Cast( 187 this=seq_get(args, 0), 188 to=exp.DataType(this=exp.DataType.Type.TEXT), 189 ), 190 start=exp.Literal.number(1), 191 length=exp.Literal.number(10), 192 ), 193 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 194 "UPPER": build_upper, 195 "VAR_MAP": build_var_map, 196 } 197 198 NO_PAREN_FUNCTIONS = { 199 TokenType.CURRENT_DATE: exp.CurrentDate, 200 TokenType.CURRENT_DATETIME: exp.CurrentDate, 201 TokenType.CURRENT_TIME: exp.CurrentTime, 202 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 203 TokenType.CURRENT_USER: exp.CurrentUser, 204 } 205 206 STRUCT_TYPE_TOKENS = { 207 TokenType.NESTED, 208 TokenType.OBJECT, 209 TokenType.STRUCT, 210 } 211 212 NESTED_TYPE_TOKENS = { 213 TokenType.ARRAY, 214 TokenType.LIST, 215 TokenType.LOWCARDINALITY, 216 TokenType.MAP, 217 TokenType.NULLABLE, 218 *STRUCT_TYPE_TOKENS, 219 } 220 221 ENUM_TYPE_TOKENS = { 222 TokenType.ENUM, 223 TokenType.ENUM8, 224 TokenType.ENUM16, 225 } 226 227 AGGREGATE_TYPE_TOKENS = { 228 TokenType.AGGREGATEFUNCTION, 229 TokenType.SIMPLEAGGREGATEFUNCTION, 230 } 231 232 TYPE_TOKENS = { 233 TokenType.BIT, 234 TokenType.BOOLEAN, 235 TokenType.TINYINT, 236 TokenType.UTINYINT, 237 TokenType.SMALLINT, 238 TokenType.USMALLINT, 239 TokenType.INT, 240 TokenType.UINT, 241 TokenType.BIGINT, 242 TokenType.UBIGINT, 243 TokenType.INT128, 244 TokenType.UINT128, 245 TokenType.INT256, 246 TokenType.UINT256, 247 TokenType.MEDIUMINT, 248 TokenType.UMEDIUMINT, 249 TokenType.FIXEDSTRING, 250 TokenType.FLOAT, 251 TokenType.DOUBLE, 252 TokenType.CHAR, 253 TokenType.NCHAR, 254 TokenType.VARCHAR, 255 TokenType.NVARCHAR, 256 TokenType.BPCHAR, 257 TokenType.TEXT, 258 TokenType.MEDIUMTEXT, 259 TokenType.LONGTEXT, 260 TokenType.MEDIUMBLOB, 261 TokenType.LONGBLOB, 262 TokenType.BINARY, 263 TokenType.VARBINARY, 264 TokenType.JSON, 265 TokenType.JSONB, 266 TokenType.INTERVAL, 267 TokenType.TINYBLOB, 268 TokenType.TINYTEXT, 269 TokenType.TIME, 270 TokenType.TIMETZ, 271 TokenType.TIMESTAMP, 272 TokenType.TIMESTAMP_S, 273 TokenType.TIMESTAMP_MS, 274 TokenType.TIMESTAMP_NS, 275 TokenType.TIMESTAMPTZ, 276 TokenType.TIMESTAMPLTZ, 277 TokenType.TIMESTAMPNTZ, 278 TokenType.DATETIME, 279 TokenType.DATETIME64, 280 TokenType.DATE, 281 TokenType.DATE32, 282 TokenType.INT4RANGE, 283 TokenType.INT4MULTIRANGE, 284 TokenType.INT8RANGE, 285 TokenType.INT8MULTIRANGE, 286 TokenType.NUMRANGE, 287 TokenType.NUMMULTIRANGE, 288 TokenType.TSRANGE, 289 TokenType.TSMULTIRANGE, 290 TokenType.TSTZRANGE, 291 TokenType.TSTZMULTIRANGE, 292 TokenType.DATERANGE, 293 TokenType.DATEMULTIRANGE, 294 TokenType.DECIMAL, 295 TokenType.UDECIMAL, 296 TokenType.BIGDECIMAL, 297 TokenType.UUID, 298 TokenType.GEOGRAPHY, 299 TokenType.GEOMETRY, 300 TokenType.HLLSKETCH, 301 TokenType.HSTORE, 302 TokenType.PSEUDO_TYPE, 303 TokenType.SUPER, 304 TokenType.SERIAL, 305 TokenType.SMALLSERIAL, 306 TokenType.BIGSERIAL, 307 TokenType.XML, 308 TokenType.YEAR, 309 TokenType.UNIQUEIDENTIFIER, 310 TokenType.USERDEFINED, 311 TokenType.MONEY, 312 TokenType.SMALLMONEY, 313 TokenType.ROWVERSION, 314 TokenType.IMAGE, 315 TokenType.VARIANT, 316 TokenType.VECTOR, 317 TokenType.OBJECT, 318 TokenType.OBJECT_IDENTIFIER, 319 TokenType.INET, 320 TokenType.IPADDRESS, 321 TokenType.IPPREFIX, 322 TokenType.IPV4, 323 TokenType.IPV6, 324 TokenType.UNKNOWN, 325 TokenType.NULL, 326 TokenType.NAME, 327 TokenType.TDIGEST, 328 *ENUM_TYPE_TOKENS, 329 *NESTED_TYPE_TOKENS, 330 *AGGREGATE_TYPE_TOKENS, 331 } 332 333 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 334 TokenType.BIGINT: TokenType.UBIGINT, 335 TokenType.INT: TokenType.UINT, 336 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 337 TokenType.SMALLINT: TokenType.USMALLINT, 338 TokenType.TINYINT: TokenType.UTINYINT, 339 TokenType.DECIMAL: TokenType.UDECIMAL, 340 } 341 342 SUBQUERY_PREDICATES = { 343 TokenType.ANY: exp.Any, 344 TokenType.ALL: exp.All, 345 TokenType.EXISTS: exp.Exists, 346 TokenType.SOME: exp.Any, 347 } 348 349 RESERVED_TOKENS = { 350 *Tokenizer.SINGLE_TOKENS.values(), 351 TokenType.SELECT, 352 } - {TokenType.IDENTIFIER} 353 354 DB_CREATABLES = { 355 TokenType.DATABASE, 356 TokenType.DICTIONARY, 357 TokenType.MODEL, 358 TokenType.SCHEMA, 359 TokenType.SEQUENCE, 360 TokenType.STORAGE_INTEGRATION, 361 TokenType.TABLE, 362 TokenType.TAG, 363 TokenType.VIEW, 364 TokenType.WAREHOUSE, 365 TokenType.STREAMLIT, 366 } 367 368 CREATABLES = { 369 TokenType.COLUMN, 370 TokenType.CONSTRAINT, 371 TokenType.FOREIGN_KEY, 372 TokenType.FUNCTION, 373 TokenType.INDEX, 374 TokenType.PROCEDURE, 375 *DB_CREATABLES, 376 } 377 378 # Tokens that can represent identifiers 379 ID_VAR_TOKENS = { 380 TokenType.ALL, 381 TokenType.VAR, 382 TokenType.ANTI, 383 TokenType.APPLY, 384 TokenType.ASC, 385 TokenType.ASOF, 386 TokenType.AUTO_INCREMENT, 387 TokenType.BEGIN, 388 TokenType.BPCHAR, 389 TokenType.CACHE, 390 TokenType.CASE, 391 TokenType.COLLATE, 392 TokenType.COMMAND, 393 TokenType.COMMENT, 394 TokenType.COMMIT, 395 TokenType.CONSTRAINT, 396 TokenType.COPY, 397 TokenType.DEFAULT, 398 TokenType.DELETE, 399 TokenType.DESC, 400 TokenType.DESCRIBE, 401 TokenType.DICTIONARY, 402 TokenType.DIV, 403 TokenType.END, 404 TokenType.EXECUTE, 405 TokenType.ESCAPE, 406 TokenType.FALSE, 407 TokenType.FIRST, 408 TokenType.FILTER, 409 TokenType.FINAL, 410 TokenType.FORMAT, 411 TokenType.FULL, 412 TokenType.IDENTIFIER, 413 TokenType.IS, 414 TokenType.ISNULL, 415 TokenType.INTERVAL, 416 TokenType.KEEP, 417 TokenType.KILL, 418 TokenType.LEFT, 419 TokenType.LOAD, 420 TokenType.MERGE, 421 TokenType.NATURAL, 422 TokenType.NEXT, 423 TokenType.OFFSET, 424 TokenType.OPERATOR, 425 TokenType.ORDINALITY, 426 TokenType.OVERLAPS, 427 TokenType.OVERWRITE, 428 TokenType.PARTITION, 429 TokenType.PERCENT, 430 TokenType.PIVOT, 431 TokenType.PRAGMA, 432 TokenType.RANGE, 433 TokenType.RECURSIVE, 434 TokenType.REFERENCES, 435 TokenType.REFRESH, 436 TokenType.REPLACE, 437 TokenType.RIGHT, 438 TokenType.ROLLUP, 439 TokenType.ROW, 440 TokenType.ROWS, 441 TokenType.SEMI, 442 TokenType.SET, 443 TokenType.SETTINGS, 444 TokenType.SHOW, 445 TokenType.TEMPORARY, 446 TokenType.TOP, 447 TokenType.TRUE, 448 TokenType.TRUNCATE, 449 TokenType.UNIQUE, 450 TokenType.UNNEST, 451 TokenType.UNPIVOT, 452 TokenType.UPDATE, 453 TokenType.USE, 454 TokenType.VOLATILE, 455 TokenType.WINDOW, 456 *CREATABLES, 457 *SUBQUERY_PREDICATES, 458 *TYPE_TOKENS, 459 *NO_PAREN_FUNCTIONS, 460 } 461 462 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 463 464 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 465 TokenType.ANTI, 466 TokenType.APPLY, 467 TokenType.ASOF, 468 TokenType.FULL, 469 TokenType.LEFT, 470 TokenType.LOCK, 471 TokenType.NATURAL, 472 TokenType.OFFSET, 473 TokenType.RIGHT, 474 TokenType.SEMI, 475 TokenType.WINDOW, 476 } 477 478 ALIAS_TOKENS = ID_VAR_TOKENS 479 480 ARRAY_CONSTRUCTORS = { 481 "ARRAY": exp.Array, 482 "LIST": exp.List, 483 } 484 485 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 486 487 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 488 489 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 490 491 FUNC_TOKENS = { 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.CURRENT_DATE, 495 TokenType.CURRENT_DATETIME, 496 TokenType.CURRENT_TIMESTAMP, 497 TokenType.CURRENT_TIME, 498 TokenType.CURRENT_USER, 499 TokenType.FILTER, 500 TokenType.FIRST, 501 TokenType.FORMAT, 502 TokenType.GLOB, 503 TokenType.IDENTIFIER, 504 TokenType.INDEX, 505 TokenType.ISNULL, 506 TokenType.ILIKE, 507 TokenType.INSERT, 508 TokenType.LIKE, 509 TokenType.MERGE, 510 TokenType.OFFSET, 511 TokenType.PRIMARY_KEY, 512 TokenType.RANGE, 513 TokenType.REPLACE, 514 TokenType.RLIKE, 515 TokenType.ROW, 516 TokenType.UNNEST, 517 TokenType.VAR, 518 TokenType.LEFT, 519 TokenType.RIGHT, 520 TokenType.SEQUENCE, 521 TokenType.DATE, 522 TokenType.DATETIME, 523 TokenType.TABLE, 524 TokenType.TIMESTAMP, 525 TokenType.TIMESTAMPTZ, 526 TokenType.TRUNCATE, 527 TokenType.WINDOW, 528 TokenType.XOR, 529 *TYPE_TOKENS, 530 *SUBQUERY_PREDICATES, 531 } 532 533 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 534 TokenType.AND: exp.And, 535 } 536 537 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 538 TokenType.COLON_EQ: exp.PropertyEQ, 539 } 540 541 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 542 TokenType.OR: exp.Or, 543 } 544 545 EQUALITY = { 546 TokenType.EQ: exp.EQ, 547 TokenType.NEQ: exp.NEQ, 548 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 549 } 550 551 COMPARISON = { 552 TokenType.GT: exp.GT, 553 TokenType.GTE: exp.GTE, 554 TokenType.LT: exp.LT, 555 TokenType.LTE: exp.LTE, 556 } 557 558 BITWISE = { 559 TokenType.AMP: exp.BitwiseAnd, 560 TokenType.CARET: exp.BitwiseXor, 561 TokenType.PIPE: exp.BitwiseOr, 562 } 563 564 TERM = { 565 TokenType.DASH: exp.Sub, 566 TokenType.PLUS: exp.Add, 567 TokenType.MOD: exp.Mod, 568 TokenType.COLLATE: exp.Collate, 569 } 570 571 FACTOR = { 572 TokenType.DIV: exp.IntDiv, 573 TokenType.LR_ARROW: exp.Distance, 574 TokenType.SLASH: exp.Div, 575 TokenType.STAR: exp.Mul, 576 } 577 578 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 579 580 TIMES = { 581 TokenType.TIME, 582 TokenType.TIMETZ, 583 } 584 585 TIMESTAMPS = { 586 TokenType.TIMESTAMP, 587 TokenType.TIMESTAMPTZ, 588 TokenType.TIMESTAMPLTZ, 589 *TIMES, 590 } 591 592 SET_OPERATIONS = { 593 TokenType.UNION, 594 TokenType.INTERSECT, 595 TokenType.EXCEPT, 596 } 597 598 JOIN_METHODS = { 599 TokenType.ASOF, 600 TokenType.NATURAL, 601 TokenType.POSITIONAL, 602 } 603 604 JOIN_SIDES = { 605 TokenType.LEFT, 606 TokenType.RIGHT, 607 TokenType.FULL, 608 } 609 610 JOIN_KINDS = { 611 TokenType.ANTI, 612 TokenType.CROSS, 613 TokenType.INNER, 614 TokenType.OUTER, 615 TokenType.SEMI, 616 TokenType.STRAIGHT_JOIN, 617 } 618 619 JOIN_HINTS: t.Set[str] = set() 620 621 LAMBDAS = { 622 TokenType.ARROW: lambda self, expressions: self.expression( 623 exp.Lambda, 624 this=self._replace_lambda( 625 self._parse_assignment(), 626 expressions, 627 ), 628 expressions=expressions, 629 ), 630 TokenType.FARROW: lambda self, expressions: self.expression( 631 exp.Kwarg, 632 this=exp.var(expressions[0].name), 633 expression=self._parse_assignment(), 634 ), 635 } 636 637 COLUMN_OPERATORS = { 638 TokenType.DOT: None, 639 TokenType.DCOLON: lambda self, this, to: self.expression( 640 exp.Cast if self.STRICT_CAST else exp.TryCast, 641 this=this, 642 to=to, 643 ), 644 TokenType.ARROW: lambda self, this, path: self.expression( 645 exp.JSONExtract, 646 this=this, 647 expression=self.dialect.to_json_path(path), 648 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 649 ), 650 TokenType.DARROW: lambda self, this, path: self.expression( 651 exp.JSONExtractScalar, 652 this=this, 653 expression=self.dialect.to_json_path(path), 654 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 655 ), 656 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 657 exp.JSONBExtract, 658 this=this, 659 expression=path, 660 ), 661 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 662 exp.JSONBExtractScalar, 663 this=this, 664 expression=path, 665 ), 666 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 667 exp.JSONBContains, 668 this=this, 669 expression=key, 670 ), 671 } 672 673 EXPRESSION_PARSERS = { 674 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 675 exp.Column: lambda self: self._parse_column(), 676 exp.Condition: lambda self: self._parse_assignment(), 677 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 678 exp.Expression: lambda self: self._parse_expression(), 679 exp.From: lambda self: self._parse_from(joins=True), 680 exp.Group: lambda self: self._parse_group(), 681 exp.Having: lambda self: self._parse_having(), 682 exp.Identifier: lambda self: self._parse_id_var(), 683 exp.Join: lambda self: self._parse_join(), 684 exp.Lambda: lambda self: self._parse_lambda(), 685 exp.Lateral: lambda self: self._parse_lateral(), 686 exp.Limit: lambda self: self._parse_limit(), 687 exp.Offset: lambda self: self._parse_offset(), 688 exp.Order: lambda self: self._parse_order(), 689 exp.Ordered: lambda self: self._parse_ordered(), 690 exp.Properties: lambda self: self._parse_properties(), 691 exp.Qualify: lambda self: self._parse_qualify(), 692 exp.Returning: lambda self: self._parse_returning(), 693 exp.Select: lambda self: self._parse_select(), 694 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 695 exp.Table: lambda self: self._parse_table_parts(), 696 exp.TableAlias: lambda self: self._parse_table_alias(), 697 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 698 exp.Where: lambda self: self._parse_where(), 699 exp.Window: lambda self: self._parse_named_window(), 700 exp.With: lambda self: self._parse_with(), 701 "JOIN_TYPE": lambda self: self._parse_join_parts(), 702 } 703 704 STATEMENT_PARSERS = { 705 TokenType.ALTER: lambda self: self._parse_alter(), 706 TokenType.BEGIN: lambda self: self._parse_transaction(), 707 TokenType.CACHE: lambda self: self._parse_cache(), 708 TokenType.COMMENT: lambda self: self._parse_comment(), 709 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 710 TokenType.COPY: lambda self: self._parse_copy(), 711 TokenType.CREATE: lambda self: self._parse_create(), 712 TokenType.DELETE: lambda self: self._parse_delete(), 713 TokenType.DESC: lambda self: self._parse_describe(), 714 TokenType.DESCRIBE: lambda self: self._parse_describe(), 715 TokenType.DROP: lambda self: self._parse_drop(), 716 TokenType.INSERT: lambda self: self._parse_insert(), 717 TokenType.KILL: lambda self: self._parse_kill(), 718 TokenType.LOAD: lambda self: self._parse_load(), 719 TokenType.MERGE: lambda self: self._parse_merge(), 720 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 721 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 722 TokenType.REFRESH: lambda self: self._parse_refresh(), 723 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 724 TokenType.SET: lambda self: self._parse_set(), 725 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 726 TokenType.UNCACHE: lambda self: self._parse_uncache(), 727 TokenType.UPDATE: lambda self: self._parse_update(), 728 TokenType.USE: lambda self: self.expression( 729 exp.Use, 730 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 731 this=self._parse_table(schema=False), 732 ), 733 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 734 } 735 736 UNARY_PARSERS = { 737 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 738 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 739 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 740 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 741 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 742 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 743 } 744 745 STRING_PARSERS = { 746 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 747 exp.RawString, this=token.text 748 ), 749 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 750 exp.National, this=token.text 751 ), 752 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 753 TokenType.STRING: lambda self, token: self.expression( 754 exp.Literal, this=token.text, is_string=True 755 ), 756 TokenType.UNICODE_STRING: lambda self, token: self.expression( 757 exp.UnicodeString, 758 this=token.text, 759 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 760 ), 761 } 762 763 NUMERIC_PARSERS = { 764 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 765 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 766 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 767 TokenType.NUMBER: lambda self, token: self.expression( 768 exp.Literal, this=token.text, is_string=False 769 ), 770 } 771 772 PRIMARY_PARSERS = { 773 **STRING_PARSERS, 774 **NUMERIC_PARSERS, 775 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 776 TokenType.NULL: lambda self, _: self.expression(exp.Null), 777 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 778 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 779 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 780 TokenType.STAR: lambda self, _: self.expression( 781 exp.Star, 782 **{ 783 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 784 "replace": self._parse_star_op("REPLACE"), 785 "rename": self._parse_star_op("RENAME"), 786 }, 787 ), 788 } 789 790 PLACEHOLDER_PARSERS = { 791 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 792 TokenType.PARAMETER: lambda self: self._parse_parameter(), 793 TokenType.COLON: lambda self: ( 794 self.expression(exp.Placeholder, this=self._prev.text) 795 if self._match_set(self.ID_VAR_TOKENS) 796 else None 797 ), 798 } 799 800 RANGE_PARSERS = { 801 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 802 TokenType.GLOB: binary_range_parser(exp.Glob), 803 TokenType.ILIKE: binary_range_parser(exp.ILike), 804 TokenType.IN: lambda self, this: self._parse_in(this), 805 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 806 TokenType.IS: lambda self, this: self._parse_is(this), 807 TokenType.LIKE: binary_range_parser(exp.Like), 808 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 809 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 810 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 811 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 812 } 813 814 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 815 "ALLOWED_VALUES": lambda self: self.expression( 816 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 817 ), 818 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 819 "AUTO": lambda self: self._parse_auto_property(), 820 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 821 "BACKUP": lambda self: self.expression( 822 exp.BackupProperty, this=self._parse_var(any_token=True) 823 ), 824 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 825 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 826 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 827 "CHECKSUM": lambda self: self._parse_checksum(), 828 "CLUSTER BY": lambda self: self._parse_cluster(), 829 "CLUSTERED": lambda self: self._parse_clustered_by(), 830 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 831 exp.CollateProperty, **kwargs 832 ), 833 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 834 "CONTAINS": lambda self: self._parse_contains_property(), 835 "COPY": lambda self: self._parse_copy_property(), 836 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 837 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 838 "DEFINER": lambda self: self._parse_definer(), 839 "DETERMINISTIC": lambda self: self.expression( 840 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 841 ), 842 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 843 "DISTKEY": lambda self: self._parse_distkey(), 844 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 845 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 846 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 847 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 848 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 849 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 850 "FREESPACE": lambda self: self._parse_freespace(), 851 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 852 "HEAP": lambda self: self.expression(exp.HeapProperty), 853 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 854 "IMMUTABLE": lambda self: self.expression( 855 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 856 ), 857 "INHERITS": lambda self: self.expression( 858 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 859 ), 860 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 861 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 862 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 863 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 864 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 865 "LIKE": lambda self: self._parse_create_like(), 866 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 867 "LOCK": lambda self: self._parse_locking(), 868 "LOCKING": lambda self: self._parse_locking(), 869 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 870 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 871 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 872 "MODIFIES": lambda self: self._parse_modifies_property(), 873 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 874 "NO": lambda self: self._parse_no_property(), 875 "ON": lambda self: self._parse_on_property(), 876 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 877 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 878 "PARTITION": lambda self: self._parse_partitioned_of(), 879 "PARTITION BY": lambda self: self._parse_partitioned_by(), 880 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 881 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 882 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 883 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 884 "READS": lambda self: self._parse_reads_property(), 885 "REMOTE": lambda self: self._parse_remote_with_connection(), 886 "RETURNS": lambda self: self._parse_returns(), 887 "STRICT": lambda self: self.expression(exp.StrictProperty), 888 "ROW": lambda self: self._parse_row(), 889 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 890 "SAMPLE": lambda self: self.expression( 891 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 892 ), 893 "SECURE": lambda self: self.expression(exp.SecureProperty), 894 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 895 "SETTINGS": lambda self: self.expression( 896 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 897 ), 898 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 899 "SORTKEY": lambda self: self._parse_sortkey(), 900 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 901 "STABLE": lambda self: self.expression( 902 exp.StabilityProperty, this=exp.Literal.string("STABLE") 903 ), 904 "STORED": lambda self: self._parse_stored(), 905 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 906 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 907 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 908 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 909 "TO": lambda self: self._parse_to_table(), 910 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 911 "TRANSFORM": lambda self: self.expression( 912 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 913 ), 914 "TTL": lambda self: self._parse_ttl(), 915 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 916 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 917 "VOLATILE": lambda self: self._parse_volatile_property(), 918 "WITH": lambda self: self._parse_with_property(), 919 } 920 921 CONSTRAINT_PARSERS = { 922 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 923 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 924 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 925 "CHARACTER SET": lambda self: self.expression( 926 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 927 ), 928 "CHECK": lambda self: self.expression( 929 exp.CheckColumnConstraint, 930 this=self._parse_wrapped(self._parse_assignment), 931 enforced=self._match_text_seq("ENFORCED"), 932 ), 933 "COLLATE": lambda self: self.expression( 934 exp.CollateColumnConstraint, 935 this=self._parse_identifier() or self._parse_column(), 936 ), 937 "COMMENT": lambda self: self.expression( 938 exp.CommentColumnConstraint, this=self._parse_string() 939 ), 940 "COMPRESS": lambda self: self._parse_compress(), 941 "CLUSTERED": lambda self: self.expression( 942 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 943 ), 944 "NONCLUSTERED": lambda self: self.expression( 945 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 946 ), 947 "DEFAULT": lambda self: self.expression( 948 exp.DefaultColumnConstraint, this=self._parse_bitwise() 949 ), 950 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 951 "EPHEMERAL": lambda self: self.expression( 952 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 953 ), 954 "EXCLUDE": lambda self: self.expression( 955 exp.ExcludeColumnConstraint, this=self._parse_index_params() 956 ), 957 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 958 "FORMAT": lambda self: self.expression( 959 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 960 ), 961 "GENERATED": lambda self: self._parse_generated_as_identity(), 962 "IDENTITY": lambda self: self._parse_auto_increment(), 963 "INLINE": lambda self: self._parse_inline(), 964 "LIKE": lambda self: self._parse_create_like(), 965 "NOT": lambda self: self._parse_not_constraint(), 966 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 967 "ON": lambda self: ( 968 self._match(TokenType.UPDATE) 969 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 970 ) 971 or self.expression(exp.OnProperty, this=self._parse_id_var()), 972 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 973 "PERIOD": lambda self: self._parse_period_for_system_time(), 974 "PRIMARY KEY": lambda self: self._parse_primary_key(), 975 "REFERENCES": lambda self: self._parse_references(match=False), 976 "TITLE": lambda self: self.expression( 977 exp.TitleColumnConstraint, this=self._parse_var_or_string() 978 ), 979 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 980 "UNIQUE": lambda self: self._parse_unique(), 981 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 982 "WITH": lambda self: self.expression( 983 exp.Properties, expressions=self._parse_wrapped_properties() 984 ), 985 } 986 987 ALTER_PARSERS = { 988 "ADD": lambda self: self._parse_alter_table_add(), 989 "ALTER": lambda self: self._parse_alter_table_alter(), 990 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 991 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 992 "DROP": lambda self: self._parse_alter_table_drop(), 993 "RENAME": lambda self: self._parse_alter_table_rename(), 994 "SET": lambda self: self._parse_alter_table_set(), 995 } 996 997 ALTER_ALTER_PARSERS = { 998 "DISTKEY": lambda self: self._parse_alter_diststyle(), 999 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1000 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1001 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1002 } 1003 1004 SCHEMA_UNNAMED_CONSTRAINTS = { 1005 "CHECK", 1006 "EXCLUDE", 1007 "FOREIGN KEY", 1008 "LIKE", 1009 "PERIOD", 1010 "PRIMARY KEY", 1011 "UNIQUE", 1012 } 1013 1014 NO_PAREN_FUNCTION_PARSERS = { 1015 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1016 "CASE": lambda self: self._parse_case(), 1017 "CONNECT_BY_ROOT": lambda self: self.expression( 1018 exp.ConnectByRoot, this=self._parse_column() 1019 ), 1020 "IF": lambda self: self._parse_if(), 1021 "NEXT": lambda self: self._parse_next_value_for(), 1022 } 1023 1024 INVALID_FUNC_NAME_TOKENS = { 1025 TokenType.IDENTIFIER, 1026 TokenType.STRING, 1027 } 1028 1029 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1030 1031 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1032 1033 FUNCTION_PARSERS = { 1034 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1035 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1036 "DECODE": lambda self: self._parse_decode(), 1037 "EXTRACT": lambda self: self._parse_extract(), 1038 "GAP_FILL": lambda self: self._parse_gap_fill(), 1039 "JSON_OBJECT": lambda self: self._parse_json_object(), 1040 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1041 "JSON_TABLE": lambda self: self._parse_json_table(), 1042 "MATCH": lambda self: self._parse_match_against(), 1043 "OPENJSON": lambda self: self._parse_open_json(), 1044 "POSITION": lambda self: self._parse_position(), 1045 "PREDICT": lambda self: self._parse_predict(), 1046 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1047 "STRING_AGG": lambda self: self._parse_string_agg(), 1048 "SUBSTRING": lambda self: self._parse_substring(), 1049 "TRIM": lambda self: self._parse_trim(), 1050 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1051 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1052 } 1053 1054 QUERY_MODIFIER_PARSERS = { 1055 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1056 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1057 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1058 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1059 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1060 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1061 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1062 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1063 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1064 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1065 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1066 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1067 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1068 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1069 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1070 TokenType.CLUSTER_BY: lambda self: ( 1071 "cluster", 1072 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1073 ), 1074 TokenType.DISTRIBUTE_BY: lambda self: ( 1075 "distribute", 1076 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1077 ), 1078 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1079 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1080 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1081 } 1082 1083 SET_PARSERS = { 1084 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1085 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1086 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1087 "TRANSACTION": lambda self: self._parse_set_transaction(), 1088 } 1089 1090 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1091 1092 TYPE_LITERAL_PARSERS = { 1093 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1094 } 1095 1096 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1097 1098 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1099 1100 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1101 1102 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1103 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1104 "ISOLATION": ( 1105 ("LEVEL", "REPEATABLE", "READ"), 1106 ("LEVEL", "READ", "COMMITTED"), 1107 ("LEVEL", "READ", "UNCOMITTED"), 1108 ("LEVEL", "SERIALIZABLE"), 1109 ), 1110 "READ": ("WRITE", "ONLY"), 1111 } 1112 1113 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1114 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1115 ) 1116 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1117 1118 CREATE_SEQUENCE: OPTIONS_TYPE = { 1119 "SCALE": ("EXTEND", "NOEXTEND"), 1120 "SHARD": ("EXTEND", "NOEXTEND"), 1121 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1122 **dict.fromkeys( 1123 ( 1124 "SESSION", 1125 "GLOBAL", 1126 "KEEP", 1127 "NOKEEP", 1128 "ORDER", 1129 "NOORDER", 1130 "NOCACHE", 1131 "CYCLE", 1132 "NOCYCLE", 1133 "NOMINVALUE", 1134 "NOMAXVALUE", 1135 "NOSCALE", 1136 "NOSHARD", 1137 ), 1138 tuple(), 1139 ), 1140 } 1141 1142 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1143 1144 USABLES: OPTIONS_TYPE = dict.fromkeys( 1145 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1146 ) 1147 1148 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1149 1150 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1151 "TYPE": ("EVOLUTION",), 1152 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1153 } 1154 1155 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1156 "NOT": ("ENFORCED",), 1157 "MATCH": ( 1158 "FULL", 1159 "PARTIAL", 1160 "SIMPLE", 1161 ), 1162 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1163 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1164 } 1165 1166 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1167 1168 CLONE_KEYWORDS = {"CLONE", "COPY"} 1169 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1170 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1171 1172 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1173 1174 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1175 1176 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1177 1178 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1179 1180 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1181 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1182 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1183 1184 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1185 1186 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1187 1188 ADD_CONSTRAINT_TOKENS = { 1189 TokenType.CONSTRAINT, 1190 TokenType.FOREIGN_KEY, 1191 TokenType.INDEX, 1192 TokenType.KEY, 1193 TokenType.PRIMARY_KEY, 1194 TokenType.UNIQUE, 1195 } 1196 1197 DISTINCT_TOKENS = {TokenType.DISTINCT} 1198 1199 NULL_TOKENS = {TokenType.NULL} 1200 1201 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1202 1203 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1204 1205 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1206 1207 STRICT_CAST = True 1208 1209 PREFIXED_PIVOT_COLUMNS = False 1210 IDENTIFY_PIVOT_STRINGS = False 1211 1212 LOG_DEFAULTS_TO_LN = False 1213 1214 # Whether ADD is present for each column added by ALTER TABLE 1215 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1216 1217 # Whether the table sample clause expects CSV syntax 1218 TABLESAMPLE_CSV = False 1219 1220 # The default method used for table sampling 1221 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1222 1223 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1224 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1225 1226 # Whether the TRIM function expects the characters to trim as its first argument 1227 TRIM_PATTERN_FIRST = False 1228 1229 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1230 STRING_ALIASES = False 1231 1232 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1233 MODIFIERS_ATTACHED_TO_SET_OP = True 1234 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1235 1236 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1237 NO_PAREN_IF_COMMANDS = True 1238 1239 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1240 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1241 1242 # Whether the `:` operator is used to extract a value from a VARIANT column 1243 COLON_IS_VARIANT_EXTRACT = False 1244 1245 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1246 # If this is True and '(' is not found, the keyword will be treated as an identifier 1247 VALUES_FOLLOWED_BY_PAREN = True 1248 1249 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1250 SUPPORTS_IMPLICIT_UNNEST = False 1251 1252 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1253 INTERVAL_SPANS = True 1254 1255 # Whether a PARTITION clause can follow a table reference 1256 SUPPORTS_PARTITION_SELECTION = False 1257 1258 __slots__ = ( 1259 "error_level", 1260 "error_message_context", 1261 "max_errors", 1262 "dialect", 1263 "sql", 1264 "errors", 1265 "_tokens", 1266 "_index", 1267 "_curr", 1268 "_next", 1269 "_prev", 1270 "_prev_comments", 1271 ) 1272 1273 # Autofilled 1274 SHOW_TRIE: t.Dict = {} 1275 SET_TRIE: t.Dict = {} 1276 1277 def __init__( 1278 self, 1279 error_level: t.Optional[ErrorLevel] = None, 1280 error_message_context: int = 100, 1281 max_errors: int = 3, 1282 dialect: DialectType = None, 1283 ): 1284 from sqlglot.dialects import Dialect 1285 1286 self.error_level = error_level or ErrorLevel.IMMEDIATE 1287 self.error_message_context = error_message_context 1288 self.max_errors = max_errors 1289 self.dialect = Dialect.get_or_raise(dialect) 1290 self.reset() 1291 1292 def reset(self): 1293 self.sql = "" 1294 self.errors = [] 1295 self._tokens = [] 1296 self._index = 0 1297 self._curr = None 1298 self._next = None 1299 self._prev = None 1300 self._prev_comments = None 1301 1302 def parse( 1303 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1304 ) -> t.List[t.Optional[exp.Expression]]: 1305 """ 1306 Parses a list of tokens and returns a list of syntax trees, one tree 1307 per parsed SQL statement. 1308 1309 Args: 1310 raw_tokens: The list of tokens. 1311 sql: The original SQL string, used to produce helpful debug messages. 1312 1313 Returns: 1314 The list of the produced syntax trees. 1315 """ 1316 return self._parse( 1317 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1318 ) 1319 1320 def parse_into( 1321 self, 1322 expression_types: exp.IntoType, 1323 raw_tokens: t.List[Token], 1324 sql: t.Optional[str] = None, 1325 ) -> t.List[t.Optional[exp.Expression]]: 1326 """ 1327 Parses a list of tokens into a given Expression type. If a collection of Expression 1328 types is given instead, this method will try to parse the token list into each one 1329 of them, stopping at the first for which the parsing succeeds. 1330 1331 Args: 1332 expression_types: The expression type(s) to try and parse the token list into. 1333 raw_tokens: The list of tokens. 1334 sql: The original SQL string, used to produce helpful debug messages. 1335 1336 Returns: 1337 The target Expression. 1338 """ 1339 errors = [] 1340 for expression_type in ensure_list(expression_types): 1341 parser = self.EXPRESSION_PARSERS.get(expression_type) 1342 if not parser: 1343 raise TypeError(f"No parser registered for {expression_type}") 1344 1345 try: 1346 return self._parse(parser, raw_tokens, sql) 1347 except ParseError as e: 1348 e.errors[0]["into_expression"] = expression_type 1349 errors.append(e) 1350 1351 raise ParseError( 1352 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1353 errors=merge_errors(errors), 1354 ) from errors[-1] 1355 1356 def _parse( 1357 self, 1358 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1359 raw_tokens: t.List[Token], 1360 sql: t.Optional[str] = None, 1361 ) -> t.List[t.Optional[exp.Expression]]: 1362 self.reset() 1363 self.sql = sql or "" 1364 1365 total = len(raw_tokens) 1366 chunks: t.List[t.List[Token]] = [[]] 1367 1368 for i, token in enumerate(raw_tokens): 1369 if token.token_type == TokenType.SEMICOLON: 1370 if token.comments: 1371 chunks.append([token]) 1372 1373 if i < total - 1: 1374 chunks.append([]) 1375 else: 1376 chunks[-1].append(token) 1377 1378 expressions = [] 1379 1380 for tokens in chunks: 1381 self._index = -1 1382 self._tokens = tokens 1383 self._advance() 1384 1385 expressions.append(parse_method(self)) 1386 1387 if self._index < len(self._tokens): 1388 self.raise_error("Invalid expression / Unexpected token") 1389 1390 self.check_errors() 1391 1392 return expressions 1393 1394 def check_errors(self) -> None: 1395 """Logs or raises any found errors, depending on the chosen error level setting.""" 1396 if self.error_level == ErrorLevel.WARN: 1397 for error in self.errors: 1398 logger.error(str(error)) 1399 elif self.error_level == ErrorLevel.RAISE and self.errors: 1400 raise ParseError( 1401 concat_messages(self.errors, self.max_errors), 1402 errors=merge_errors(self.errors), 1403 ) 1404 1405 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1406 """ 1407 Appends an error in the list of recorded errors or raises it, depending on the chosen 1408 error level setting. 1409 """ 1410 token = token or self._curr or self._prev or Token.string("") 1411 start = token.start 1412 end = token.end + 1 1413 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1414 highlight = self.sql[start:end] 1415 end_context = self.sql[end : end + self.error_message_context] 1416 1417 error = ParseError.new( 1418 f"{message}. Line {token.line}, Col: {token.col}.\n" 1419 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1420 description=message, 1421 line=token.line, 1422 col=token.col, 1423 start_context=start_context, 1424 highlight=highlight, 1425 end_context=end_context, 1426 ) 1427 1428 if self.error_level == ErrorLevel.IMMEDIATE: 1429 raise error 1430 1431 self.errors.append(error) 1432 1433 def expression( 1434 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1435 ) -> E: 1436 """ 1437 Creates a new, validated Expression. 1438 1439 Args: 1440 exp_class: The expression class to instantiate. 1441 comments: An optional list of comments to attach to the expression. 1442 kwargs: The arguments to set for the expression along with their respective values. 1443 1444 Returns: 1445 The target expression. 1446 """ 1447 instance = exp_class(**kwargs) 1448 instance.add_comments(comments) if comments else self._add_comments(instance) 1449 return self.validate_expression(instance) 1450 1451 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1452 if expression and self._prev_comments: 1453 expression.add_comments(self._prev_comments) 1454 self._prev_comments = None 1455 1456 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1457 """ 1458 Validates an Expression, making sure that all its mandatory arguments are set. 1459 1460 Args: 1461 expression: The expression to validate. 1462 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1463 1464 Returns: 1465 The validated expression. 1466 """ 1467 if self.error_level != ErrorLevel.IGNORE: 1468 for error_message in expression.error_messages(args): 1469 self.raise_error(error_message) 1470 1471 return expression 1472 1473 def _find_sql(self, start: Token, end: Token) -> str: 1474 return self.sql[start.start : end.end + 1] 1475 1476 def _is_connected(self) -> bool: 1477 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1478 1479 def _advance(self, times: int = 1) -> None: 1480 self._index += times 1481 self._curr = seq_get(self._tokens, self._index) 1482 self._next = seq_get(self._tokens, self._index + 1) 1483 1484 if self._index > 0: 1485 self._prev = self._tokens[self._index - 1] 1486 self._prev_comments = self._prev.comments 1487 else: 1488 self._prev = None 1489 self._prev_comments = None 1490 1491 def _retreat(self, index: int) -> None: 1492 if index != self._index: 1493 self._advance(index - self._index) 1494 1495 def _warn_unsupported(self) -> None: 1496 if len(self._tokens) <= 1: 1497 return 1498 1499 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1500 # interested in emitting a warning for the one being currently processed. 1501 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1502 1503 logger.warning( 1504 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1505 ) 1506 1507 def _parse_command(self) -> exp.Command: 1508 self._warn_unsupported() 1509 return self.expression( 1510 exp.Command, 1511 comments=self._prev_comments, 1512 this=self._prev.text.upper(), 1513 expression=self._parse_string(), 1514 ) 1515 1516 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1517 """ 1518 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1519 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1520 solve this by setting & resetting the parser state accordingly 1521 """ 1522 index = self._index 1523 error_level = self.error_level 1524 1525 self.error_level = ErrorLevel.IMMEDIATE 1526 try: 1527 this = parse_method() 1528 except ParseError: 1529 this = None 1530 finally: 1531 if not this or retreat: 1532 self._retreat(index) 1533 self.error_level = error_level 1534 1535 return this 1536 1537 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1538 start = self._prev 1539 exists = self._parse_exists() if allow_exists else None 1540 1541 self._match(TokenType.ON) 1542 1543 materialized = self._match_text_seq("MATERIALIZED") 1544 kind = self._match_set(self.CREATABLES) and self._prev 1545 if not kind: 1546 return self._parse_as_command(start) 1547 1548 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1549 this = self._parse_user_defined_function(kind=kind.token_type) 1550 elif kind.token_type == TokenType.TABLE: 1551 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1552 elif kind.token_type == TokenType.COLUMN: 1553 this = self._parse_column() 1554 else: 1555 this = self._parse_id_var() 1556 1557 self._match(TokenType.IS) 1558 1559 return self.expression( 1560 exp.Comment, 1561 this=this, 1562 kind=kind.text, 1563 expression=self._parse_string(), 1564 exists=exists, 1565 materialized=materialized, 1566 ) 1567 1568 def _parse_to_table( 1569 self, 1570 ) -> exp.ToTableProperty: 1571 table = self._parse_table_parts(schema=True) 1572 return self.expression(exp.ToTableProperty, this=table) 1573 1574 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1575 def _parse_ttl(self) -> exp.Expression: 1576 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1577 this = self._parse_bitwise() 1578 1579 if self._match_text_seq("DELETE"): 1580 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1581 if self._match_text_seq("RECOMPRESS"): 1582 return self.expression( 1583 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1584 ) 1585 if self._match_text_seq("TO", "DISK"): 1586 return self.expression( 1587 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1588 ) 1589 if self._match_text_seq("TO", "VOLUME"): 1590 return self.expression( 1591 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1592 ) 1593 1594 return this 1595 1596 expressions = self._parse_csv(_parse_ttl_action) 1597 where = self._parse_where() 1598 group = self._parse_group() 1599 1600 aggregates = None 1601 if group and self._match(TokenType.SET): 1602 aggregates = self._parse_csv(self._parse_set_item) 1603 1604 return self.expression( 1605 exp.MergeTreeTTL, 1606 expressions=expressions, 1607 where=where, 1608 group=group, 1609 aggregates=aggregates, 1610 ) 1611 1612 def _parse_statement(self) -> t.Optional[exp.Expression]: 1613 if self._curr is None: 1614 return None 1615 1616 if self._match_set(self.STATEMENT_PARSERS): 1617 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1618 1619 if self._match_set(self.dialect.tokenizer.COMMANDS): 1620 return self._parse_command() 1621 1622 expression = self._parse_expression() 1623 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1624 return self._parse_query_modifiers(expression) 1625 1626 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1627 start = self._prev 1628 temporary = self._match(TokenType.TEMPORARY) 1629 materialized = self._match_text_seq("MATERIALIZED") 1630 1631 kind = self._match_set(self.CREATABLES) and self._prev.text 1632 if not kind: 1633 return self._parse_as_command(start) 1634 1635 if_exists = exists or self._parse_exists() 1636 table = self._parse_table_parts( 1637 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1638 ) 1639 1640 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1641 1642 if self._match(TokenType.L_PAREN, advance=False): 1643 expressions = self._parse_wrapped_csv(self._parse_types) 1644 else: 1645 expressions = None 1646 1647 return self.expression( 1648 exp.Drop, 1649 comments=start.comments, 1650 exists=if_exists, 1651 this=table, 1652 expressions=expressions, 1653 kind=kind.upper(), 1654 temporary=temporary, 1655 materialized=materialized, 1656 cascade=self._match_text_seq("CASCADE"), 1657 constraints=self._match_text_seq("CONSTRAINTS"), 1658 purge=self._match_text_seq("PURGE"), 1659 cluster=cluster, 1660 ) 1661 1662 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1663 return ( 1664 self._match_text_seq("IF") 1665 and (not not_ or self._match(TokenType.NOT)) 1666 and self._match(TokenType.EXISTS) 1667 ) 1668 1669 def _parse_create(self) -> exp.Create | exp.Command: 1670 # Note: this can't be None because we've matched a statement parser 1671 start = self._prev 1672 comments = self._prev_comments 1673 1674 replace = ( 1675 start.token_type == TokenType.REPLACE 1676 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1677 or self._match_pair(TokenType.OR, TokenType.ALTER) 1678 ) 1679 1680 unique = self._match(TokenType.UNIQUE) 1681 1682 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1683 clustered = True 1684 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1685 "COLUMNSTORE" 1686 ): 1687 clustered = False 1688 else: 1689 clustered = None 1690 1691 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1692 self._advance() 1693 1694 properties = None 1695 create_token = self._match_set(self.CREATABLES) and self._prev 1696 1697 if not create_token: 1698 # exp.Properties.Location.POST_CREATE 1699 properties = self._parse_properties() 1700 create_token = self._match_set(self.CREATABLES) and self._prev 1701 1702 if not properties or not create_token: 1703 return self._parse_as_command(start) 1704 1705 concurrently = self._match_text_seq("CONCURRENTLY") 1706 exists = self._parse_exists(not_=True) 1707 this = None 1708 expression: t.Optional[exp.Expression] = None 1709 indexes = None 1710 no_schema_binding = None 1711 begin = None 1712 end = None 1713 clone = None 1714 1715 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1716 nonlocal properties 1717 if properties and temp_props: 1718 properties.expressions.extend(temp_props.expressions) 1719 elif temp_props: 1720 properties = temp_props 1721 1722 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1723 this = self._parse_user_defined_function(kind=create_token.token_type) 1724 1725 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1726 extend_props(self._parse_properties()) 1727 1728 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1729 extend_props(self._parse_properties()) 1730 1731 if not expression: 1732 if self._match(TokenType.COMMAND): 1733 expression = self._parse_as_command(self._prev) 1734 else: 1735 begin = self._match(TokenType.BEGIN) 1736 return_ = self._match_text_seq("RETURN") 1737 1738 if self._match(TokenType.STRING, advance=False): 1739 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1740 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1741 expression = self._parse_string() 1742 extend_props(self._parse_properties()) 1743 else: 1744 expression = self._parse_statement() 1745 1746 end = self._match_text_seq("END") 1747 1748 if return_: 1749 expression = self.expression(exp.Return, this=expression) 1750 elif create_token.token_type == TokenType.INDEX: 1751 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1752 if not self._match(TokenType.ON): 1753 index = self._parse_id_var() 1754 anonymous = False 1755 else: 1756 index = None 1757 anonymous = True 1758 1759 this = self._parse_index(index=index, anonymous=anonymous) 1760 elif create_token.token_type in self.DB_CREATABLES: 1761 table_parts = self._parse_table_parts( 1762 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1763 ) 1764 1765 # exp.Properties.Location.POST_NAME 1766 self._match(TokenType.COMMA) 1767 extend_props(self._parse_properties(before=True)) 1768 1769 this = self._parse_schema(this=table_parts) 1770 1771 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1772 extend_props(self._parse_properties()) 1773 1774 self._match(TokenType.ALIAS) 1775 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1776 # exp.Properties.Location.POST_ALIAS 1777 extend_props(self._parse_properties()) 1778 1779 if create_token.token_type == TokenType.SEQUENCE: 1780 expression = self._parse_types() 1781 extend_props(self._parse_properties()) 1782 else: 1783 expression = self._parse_ddl_select() 1784 1785 if create_token.token_type == TokenType.TABLE: 1786 # exp.Properties.Location.POST_EXPRESSION 1787 extend_props(self._parse_properties()) 1788 1789 indexes = [] 1790 while True: 1791 index = self._parse_index() 1792 1793 # exp.Properties.Location.POST_INDEX 1794 extend_props(self._parse_properties()) 1795 1796 if not index: 1797 break 1798 else: 1799 self._match(TokenType.COMMA) 1800 indexes.append(index) 1801 elif create_token.token_type == TokenType.VIEW: 1802 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1803 no_schema_binding = True 1804 1805 shallow = self._match_text_seq("SHALLOW") 1806 1807 if self._match_texts(self.CLONE_KEYWORDS): 1808 copy = self._prev.text.lower() == "copy" 1809 clone = self.expression( 1810 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1811 ) 1812 1813 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1814 return self._parse_as_command(start) 1815 1816 return self.expression( 1817 exp.Create, 1818 comments=comments, 1819 this=this, 1820 kind=create_token.text.upper(), 1821 replace=replace, 1822 unique=unique, 1823 expression=expression, 1824 exists=exists, 1825 properties=properties, 1826 indexes=indexes, 1827 no_schema_binding=no_schema_binding, 1828 begin=begin, 1829 end=end, 1830 clone=clone, 1831 concurrently=concurrently, 1832 clustered=clustered, 1833 ) 1834 1835 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1836 seq = exp.SequenceProperties() 1837 1838 options = [] 1839 index = self._index 1840 1841 while self._curr: 1842 self._match(TokenType.COMMA) 1843 if self._match_text_seq("INCREMENT"): 1844 self._match_text_seq("BY") 1845 self._match_text_seq("=") 1846 seq.set("increment", self._parse_term()) 1847 elif self._match_text_seq("MINVALUE"): 1848 seq.set("minvalue", self._parse_term()) 1849 elif self._match_text_seq("MAXVALUE"): 1850 seq.set("maxvalue", self._parse_term()) 1851 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1852 self._match_text_seq("=") 1853 seq.set("start", self._parse_term()) 1854 elif self._match_text_seq("CACHE"): 1855 # T-SQL allows empty CACHE which is initialized dynamically 1856 seq.set("cache", self._parse_number() or True) 1857 elif self._match_text_seq("OWNED", "BY"): 1858 # "OWNED BY NONE" is the default 1859 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1860 else: 1861 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1862 if opt: 1863 options.append(opt) 1864 else: 1865 break 1866 1867 seq.set("options", options if options else None) 1868 return None if self._index == index else seq 1869 1870 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1871 # only used for teradata currently 1872 self._match(TokenType.COMMA) 1873 1874 kwargs = { 1875 "no": self._match_text_seq("NO"), 1876 "dual": self._match_text_seq("DUAL"), 1877 "before": self._match_text_seq("BEFORE"), 1878 "default": self._match_text_seq("DEFAULT"), 1879 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1880 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1881 "after": self._match_text_seq("AFTER"), 1882 "minimum": self._match_texts(("MIN", "MINIMUM")), 1883 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1884 } 1885 1886 if self._match_texts(self.PROPERTY_PARSERS): 1887 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1888 try: 1889 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1890 except TypeError: 1891 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1892 1893 return None 1894 1895 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1896 return self._parse_wrapped_csv(self._parse_property) 1897 1898 def _parse_property(self) -> t.Optional[exp.Expression]: 1899 if self._match_texts(self.PROPERTY_PARSERS): 1900 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1901 1902 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1903 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1904 1905 if self._match_text_seq("COMPOUND", "SORTKEY"): 1906 return self._parse_sortkey(compound=True) 1907 1908 if self._match_text_seq("SQL", "SECURITY"): 1909 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1910 1911 index = self._index 1912 key = self._parse_column() 1913 1914 if not self._match(TokenType.EQ): 1915 self._retreat(index) 1916 return self._parse_sequence_properties() 1917 1918 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1919 if isinstance(key, exp.Column): 1920 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1921 1922 value = self._parse_bitwise() or self._parse_var(any_token=True) 1923 1924 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1925 if isinstance(value, exp.Column): 1926 value = exp.var(value.name) 1927 1928 return self.expression(exp.Property, this=key, value=value) 1929 1930 def _parse_stored(self) -> exp.FileFormatProperty: 1931 self._match(TokenType.ALIAS) 1932 1933 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1934 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1935 1936 return self.expression( 1937 exp.FileFormatProperty, 1938 this=( 1939 self.expression( 1940 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1941 ) 1942 if input_format or output_format 1943 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1944 ), 1945 ) 1946 1947 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1948 field = self._parse_field() 1949 if isinstance(field, exp.Identifier) and not field.quoted: 1950 field = exp.var(field) 1951 1952 return field 1953 1954 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1955 self._match(TokenType.EQ) 1956 self._match(TokenType.ALIAS) 1957 1958 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1959 1960 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1961 properties = [] 1962 while True: 1963 if before: 1964 prop = self._parse_property_before() 1965 else: 1966 prop = self._parse_property() 1967 if not prop: 1968 break 1969 for p in ensure_list(prop): 1970 properties.append(p) 1971 1972 if properties: 1973 return self.expression(exp.Properties, expressions=properties) 1974 1975 return None 1976 1977 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1978 return self.expression( 1979 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1980 ) 1981 1982 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1983 if self._index >= 2: 1984 pre_volatile_token = self._tokens[self._index - 2] 1985 else: 1986 pre_volatile_token = None 1987 1988 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1989 return exp.VolatileProperty() 1990 1991 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1992 1993 def _parse_retention_period(self) -> exp.Var: 1994 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1995 number = self._parse_number() 1996 number_str = f"{number} " if number else "" 1997 unit = self._parse_var(any_token=True) 1998 return exp.var(f"{number_str}{unit}") 1999 2000 def _parse_system_versioning_property( 2001 self, with_: bool = False 2002 ) -> exp.WithSystemVersioningProperty: 2003 self._match(TokenType.EQ) 2004 prop = self.expression( 2005 exp.WithSystemVersioningProperty, 2006 **{ # type: ignore 2007 "on": True, 2008 "with": with_, 2009 }, 2010 ) 2011 2012 if self._match_text_seq("OFF"): 2013 prop.set("on", False) 2014 return prop 2015 2016 self._match(TokenType.ON) 2017 if self._match(TokenType.L_PAREN): 2018 while self._curr and not self._match(TokenType.R_PAREN): 2019 if self._match_text_seq("HISTORY_TABLE", "="): 2020 prop.set("this", self._parse_table_parts()) 2021 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2022 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2023 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2024 prop.set("retention_period", self._parse_retention_period()) 2025 2026 self._match(TokenType.COMMA) 2027 2028 return prop 2029 2030 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2031 self._match(TokenType.EQ) 2032 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2033 prop = self.expression(exp.DataDeletionProperty, on=on) 2034 2035 if self._match(TokenType.L_PAREN): 2036 while self._curr and not self._match(TokenType.R_PAREN): 2037 if self._match_text_seq("FILTER_COLUMN", "="): 2038 prop.set("filter_column", self._parse_column()) 2039 elif self._match_text_seq("RETENTION_PERIOD", "="): 2040 prop.set("retention_period", self._parse_retention_period()) 2041 2042 self._match(TokenType.COMMA) 2043 2044 return prop 2045 2046 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2047 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2048 prop = self._parse_system_versioning_property(with_=True) 2049 self._match_r_paren() 2050 return prop 2051 2052 if self._match(TokenType.L_PAREN, advance=False): 2053 return self._parse_wrapped_properties() 2054 2055 if self._match_text_seq("JOURNAL"): 2056 return self._parse_withjournaltable() 2057 2058 if self._match_texts(self.VIEW_ATTRIBUTES): 2059 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2060 2061 if self._match_text_seq("DATA"): 2062 return self._parse_withdata(no=False) 2063 elif self._match_text_seq("NO", "DATA"): 2064 return self._parse_withdata(no=True) 2065 2066 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2067 return self._parse_serde_properties(with_=True) 2068 2069 if self._match(TokenType.SCHEMA): 2070 return self.expression( 2071 exp.WithSchemaBindingProperty, 2072 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2073 ) 2074 2075 if not self._next: 2076 return None 2077 2078 return self._parse_withisolatedloading() 2079 2080 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2081 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2082 self._match(TokenType.EQ) 2083 2084 user = self._parse_id_var() 2085 self._match(TokenType.PARAMETER) 2086 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2087 2088 if not user or not host: 2089 return None 2090 2091 return exp.DefinerProperty(this=f"{user}@{host}") 2092 2093 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2094 self._match(TokenType.TABLE) 2095 self._match(TokenType.EQ) 2096 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2097 2098 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2099 return self.expression(exp.LogProperty, no=no) 2100 2101 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2102 return self.expression(exp.JournalProperty, **kwargs) 2103 2104 def _parse_checksum(self) -> exp.ChecksumProperty: 2105 self._match(TokenType.EQ) 2106 2107 on = None 2108 if self._match(TokenType.ON): 2109 on = True 2110 elif self._match_text_seq("OFF"): 2111 on = False 2112 2113 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2114 2115 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2116 return self.expression( 2117 exp.Cluster, 2118 expressions=( 2119 self._parse_wrapped_csv(self._parse_ordered) 2120 if wrapped 2121 else self._parse_csv(self._parse_ordered) 2122 ), 2123 ) 2124 2125 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2126 self._match_text_seq("BY") 2127 2128 self._match_l_paren() 2129 expressions = self._parse_csv(self._parse_column) 2130 self._match_r_paren() 2131 2132 if self._match_text_seq("SORTED", "BY"): 2133 self._match_l_paren() 2134 sorted_by = self._parse_csv(self._parse_ordered) 2135 self._match_r_paren() 2136 else: 2137 sorted_by = None 2138 2139 self._match(TokenType.INTO) 2140 buckets = self._parse_number() 2141 self._match_text_seq("BUCKETS") 2142 2143 return self.expression( 2144 exp.ClusteredByProperty, 2145 expressions=expressions, 2146 sorted_by=sorted_by, 2147 buckets=buckets, 2148 ) 2149 2150 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2151 if not self._match_text_seq("GRANTS"): 2152 self._retreat(self._index - 1) 2153 return None 2154 2155 return self.expression(exp.CopyGrantsProperty) 2156 2157 def _parse_freespace(self) -> exp.FreespaceProperty: 2158 self._match(TokenType.EQ) 2159 return self.expression( 2160 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2161 ) 2162 2163 def _parse_mergeblockratio( 2164 self, no: bool = False, default: bool = False 2165 ) -> exp.MergeBlockRatioProperty: 2166 if self._match(TokenType.EQ): 2167 return self.expression( 2168 exp.MergeBlockRatioProperty, 2169 this=self._parse_number(), 2170 percent=self._match(TokenType.PERCENT), 2171 ) 2172 2173 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2174 2175 def _parse_datablocksize( 2176 self, 2177 default: t.Optional[bool] = None, 2178 minimum: t.Optional[bool] = None, 2179 maximum: t.Optional[bool] = None, 2180 ) -> exp.DataBlocksizeProperty: 2181 self._match(TokenType.EQ) 2182 size = self._parse_number() 2183 2184 units = None 2185 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2186 units = self._prev.text 2187 2188 return self.expression( 2189 exp.DataBlocksizeProperty, 2190 size=size, 2191 units=units, 2192 default=default, 2193 minimum=minimum, 2194 maximum=maximum, 2195 ) 2196 2197 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2198 self._match(TokenType.EQ) 2199 always = self._match_text_seq("ALWAYS") 2200 manual = self._match_text_seq("MANUAL") 2201 never = self._match_text_seq("NEVER") 2202 default = self._match_text_seq("DEFAULT") 2203 2204 autotemp = None 2205 if self._match_text_seq("AUTOTEMP"): 2206 autotemp = self._parse_schema() 2207 2208 return self.expression( 2209 exp.BlockCompressionProperty, 2210 always=always, 2211 manual=manual, 2212 never=never, 2213 default=default, 2214 autotemp=autotemp, 2215 ) 2216 2217 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2218 index = self._index 2219 no = self._match_text_seq("NO") 2220 concurrent = self._match_text_seq("CONCURRENT") 2221 2222 if not self._match_text_seq("ISOLATED", "LOADING"): 2223 self._retreat(index) 2224 return None 2225 2226 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2227 return self.expression( 2228 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2229 ) 2230 2231 def _parse_locking(self) -> exp.LockingProperty: 2232 if self._match(TokenType.TABLE): 2233 kind = "TABLE" 2234 elif self._match(TokenType.VIEW): 2235 kind = "VIEW" 2236 elif self._match(TokenType.ROW): 2237 kind = "ROW" 2238 elif self._match_text_seq("DATABASE"): 2239 kind = "DATABASE" 2240 else: 2241 kind = None 2242 2243 if kind in ("DATABASE", "TABLE", "VIEW"): 2244 this = self._parse_table_parts() 2245 else: 2246 this = None 2247 2248 if self._match(TokenType.FOR): 2249 for_or_in = "FOR" 2250 elif self._match(TokenType.IN): 2251 for_or_in = "IN" 2252 else: 2253 for_or_in = None 2254 2255 if self._match_text_seq("ACCESS"): 2256 lock_type = "ACCESS" 2257 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2258 lock_type = "EXCLUSIVE" 2259 elif self._match_text_seq("SHARE"): 2260 lock_type = "SHARE" 2261 elif self._match_text_seq("READ"): 2262 lock_type = "READ" 2263 elif self._match_text_seq("WRITE"): 2264 lock_type = "WRITE" 2265 elif self._match_text_seq("CHECKSUM"): 2266 lock_type = "CHECKSUM" 2267 else: 2268 lock_type = None 2269 2270 override = self._match_text_seq("OVERRIDE") 2271 2272 return self.expression( 2273 exp.LockingProperty, 2274 this=this, 2275 kind=kind, 2276 for_or_in=for_or_in, 2277 lock_type=lock_type, 2278 override=override, 2279 ) 2280 2281 def _parse_partition_by(self) -> t.List[exp.Expression]: 2282 if self._match(TokenType.PARTITION_BY): 2283 return self._parse_csv(self._parse_assignment) 2284 return [] 2285 2286 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2287 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2288 if self._match_text_seq("MINVALUE"): 2289 return exp.var("MINVALUE") 2290 if self._match_text_seq("MAXVALUE"): 2291 return exp.var("MAXVALUE") 2292 return self._parse_bitwise() 2293 2294 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2295 expression = None 2296 from_expressions = None 2297 to_expressions = None 2298 2299 if self._match(TokenType.IN): 2300 this = self._parse_wrapped_csv(self._parse_bitwise) 2301 elif self._match(TokenType.FROM): 2302 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2303 self._match_text_seq("TO") 2304 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2305 elif self._match_text_seq("WITH", "(", "MODULUS"): 2306 this = self._parse_number() 2307 self._match_text_seq(",", "REMAINDER") 2308 expression = self._parse_number() 2309 self._match_r_paren() 2310 else: 2311 self.raise_error("Failed to parse partition bound spec.") 2312 2313 return self.expression( 2314 exp.PartitionBoundSpec, 2315 this=this, 2316 expression=expression, 2317 from_expressions=from_expressions, 2318 to_expressions=to_expressions, 2319 ) 2320 2321 # https://www.postgresql.org/docs/current/sql-createtable.html 2322 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2323 if not self._match_text_seq("OF"): 2324 self._retreat(self._index - 1) 2325 return None 2326 2327 this = self._parse_table(schema=True) 2328 2329 if self._match(TokenType.DEFAULT): 2330 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2331 elif self._match_text_seq("FOR", "VALUES"): 2332 expression = self._parse_partition_bound_spec() 2333 else: 2334 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2335 2336 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2337 2338 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2339 self._match(TokenType.EQ) 2340 return self.expression( 2341 exp.PartitionedByProperty, 2342 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2343 ) 2344 2345 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2346 if self._match_text_seq("AND", "STATISTICS"): 2347 statistics = True 2348 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2349 statistics = False 2350 else: 2351 statistics = None 2352 2353 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2354 2355 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2356 if self._match_text_seq("SQL"): 2357 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2358 return None 2359 2360 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2361 if self._match_text_seq("SQL", "DATA"): 2362 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2363 return None 2364 2365 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2366 if self._match_text_seq("PRIMARY", "INDEX"): 2367 return exp.NoPrimaryIndexProperty() 2368 if self._match_text_seq("SQL"): 2369 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2370 return None 2371 2372 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2373 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2374 return exp.OnCommitProperty() 2375 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2376 return exp.OnCommitProperty(delete=True) 2377 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2378 2379 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2380 if self._match_text_seq("SQL", "DATA"): 2381 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2382 return None 2383 2384 def _parse_distkey(self) -> exp.DistKeyProperty: 2385 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2386 2387 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2388 table = self._parse_table(schema=True) 2389 2390 options = [] 2391 while self._match_texts(("INCLUDING", "EXCLUDING")): 2392 this = self._prev.text.upper() 2393 2394 id_var = self._parse_id_var() 2395 if not id_var: 2396 return None 2397 2398 options.append( 2399 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2400 ) 2401 2402 return self.expression(exp.LikeProperty, this=table, expressions=options) 2403 2404 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2405 return self.expression( 2406 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2407 ) 2408 2409 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2410 self._match(TokenType.EQ) 2411 return self.expression( 2412 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2413 ) 2414 2415 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2416 self._match_text_seq("WITH", "CONNECTION") 2417 return self.expression( 2418 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2419 ) 2420 2421 def _parse_returns(self) -> exp.ReturnsProperty: 2422 value: t.Optional[exp.Expression] 2423 null = None 2424 is_table = self._match(TokenType.TABLE) 2425 2426 if is_table: 2427 if self._match(TokenType.LT): 2428 value = self.expression( 2429 exp.Schema, 2430 this="TABLE", 2431 expressions=self._parse_csv(self._parse_struct_types), 2432 ) 2433 if not self._match(TokenType.GT): 2434 self.raise_error("Expecting >") 2435 else: 2436 value = self._parse_schema(exp.var("TABLE")) 2437 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2438 null = True 2439 value = None 2440 else: 2441 value = self._parse_types() 2442 2443 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2444 2445 def _parse_describe(self) -> exp.Describe: 2446 kind = self._match_set(self.CREATABLES) and self._prev.text 2447 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2448 if self._match(TokenType.DOT): 2449 style = None 2450 self._retreat(self._index - 2) 2451 this = self._parse_table(schema=True) 2452 properties = self._parse_properties() 2453 expressions = properties.expressions if properties else None 2454 return self.expression( 2455 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2456 ) 2457 2458 def _parse_insert(self) -> exp.Insert: 2459 comments = ensure_list(self._prev_comments) 2460 hint = self._parse_hint() 2461 overwrite = self._match(TokenType.OVERWRITE) 2462 ignore = self._match(TokenType.IGNORE) 2463 local = self._match_text_seq("LOCAL") 2464 alternative = None 2465 is_function = None 2466 2467 if self._match_text_seq("DIRECTORY"): 2468 this: t.Optional[exp.Expression] = self.expression( 2469 exp.Directory, 2470 this=self._parse_var_or_string(), 2471 local=local, 2472 row_format=self._parse_row_format(match_row=True), 2473 ) 2474 else: 2475 if self._match(TokenType.OR): 2476 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2477 2478 self._match(TokenType.INTO) 2479 comments += ensure_list(self._prev_comments) 2480 self._match(TokenType.TABLE) 2481 is_function = self._match(TokenType.FUNCTION) 2482 2483 this = ( 2484 self._parse_table(schema=True, parse_partition=True) 2485 if not is_function 2486 else self._parse_function() 2487 ) 2488 2489 returning = self._parse_returning() 2490 2491 return self.expression( 2492 exp.Insert, 2493 comments=comments, 2494 hint=hint, 2495 is_function=is_function, 2496 this=this, 2497 stored=self._match_text_seq("STORED") and self._parse_stored(), 2498 by_name=self._match_text_seq("BY", "NAME"), 2499 exists=self._parse_exists(), 2500 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2501 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2502 conflict=self._parse_on_conflict(), 2503 returning=returning or self._parse_returning(), 2504 overwrite=overwrite, 2505 alternative=alternative, 2506 ignore=ignore, 2507 ) 2508 2509 def _parse_kill(self) -> exp.Kill: 2510 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2511 2512 return self.expression( 2513 exp.Kill, 2514 this=self._parse_primary(), 2515 kind=kind, 2516 ) 2517 2518 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2519 conflict = self._match_text_seq("ON", "CONFLICT") 2520 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2521 2522 if not conflict and not duplicate: 2523 return None 2524 2525 conflict_keys = None 2526 constraint = None 2527 2528 if conflict: 2529 if self._match_text_seq("ON", "CONSTRAINT"): 2530 constraint = self._parse_id_var() 2531 elif self._match(TokenType.L_PAREN): 2532 conflict_keys = self._parse_csv(self._parse_id_var) 2533 self._match_r_paren() 2534 2535 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2536 if self._prev.token_type == TokenType.UPDATE: 2537 self._match(TokenType.SET) 2538 expressions = self._parse_csv(self._parse_equality) 2539 else: 2540 expressions = None 2541 2542 return self.expression( 2543 exp.OnConflict, 2544 duplicate=duplicate, 2545 expressions=expressions, 2546 action=action, 2547 conflict_keys=conflict_keys, 2548 constraint=constraint, 2549 ) 2550 2551 def _parse_returning(self) -> t.Optional[exp.Returning]: 2552 if not self._match(TokenType.RETURNING): 2553 return None 2554 return self.expression( 2555 exp.Returning, 2556 expressions=self._parse_csv(self._parse_expression), 2557 into=self._match(TokenType.INTO) and self._parse_table_part(), 2558 ) 2559 2560 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2561 if not self._match(TokenType.FORMAT): 2562 return None 2563 return self._parse_row_format() 2564 2565 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2566 index = self._index 2567 with_ = with_ or self._match_text_seq("WITH") 2568 2569 if not self._match(TokenType.SERDE_PROPERTIES): 2570 self._retreat(index) 2571 return None 2572 return self.expression( 2573 exp.SerdeProperties, 2574 **{ # type: ignore 2575 "expressions": self._parse_wrapped_properties(), 2576 "with": with_, 2577 }, 2578 ) 2579 2580 def _parse_row_format( 2581 self, match_row: bool = False 2582 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2583 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2584 return None 2585 2586 if self._match_text_seq("SERDE"): 2587 this = self._parse_string() 2588 2589 serde_properties = self._parse_serde_properties() 2590 2591 return self.expression( 2592 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2593 ) 2594 2595 self._match_text_seq("DELIMITED") 2596 2597 kwargs = {} 2598 2599 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2600 kwargs["fields"] = self._parse_string() 2601 if self._match_text_seq("ESCAPED", "BY"): 2602 kwargs["escaped"] = self._parse_string() 2603 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2604 kwargs["collection_items"] = self._parse_string() 2605 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2606 kwargs["map_keys"] = self._parse_string() 2607 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2608 kwargs["lines"] = self._parse_string() 2609 if self._match_text_seq("NULL", "DEFINED", "AS"): 2610 kwargs["null"] = self._parse_string() 2611 2612 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2613 2614 def _parse_load(self) -> exp.LoadData | exp.Command: 2615 if self._match_text_seq("DATA"): 2616 local = self._match_text_seq("LOCAL") 2617 self._match_text_seq("INPATH") 2618 inpath = self._parse_string() 2619 overwrite = self._match(TokenType.OVERWRITE) 2620 self._match_pair(TokenType.INTO, TokenType.TABLE) 2621 2622 return self.expression( 2623 exp.LoadData, 2624 this=self._parse_table(schema=True), 2625 local=local, 2626 overwrite=overwrite, 2627 inpath=inpath, 2628 partition=self._parse_partition(), 2629 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2630 serde=self._match_text_seq("SERDE") and self._parse_string(), 2631 ) 2632 return self._parse_as_command(self._prev) 2633 2634 def _parse_delete(self) -> exp.Delete: 2635 # This handles MySQL's "Multiple-Table Syntax" 2636 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2637 tables = None 2638 comments = self._prev_comments 2639 if not self._match(TokenType.FROM, advance=False): 2640 tables = self._parse_csv(self._parse_table) or None 2641 2642 returning = self._parse_returning() 2643 2644 return self.expression( 2645 exp.Delete, 2646 comments=comments, 2647 tables=tables, 2648 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2649 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2650 where=self._parse_where(), 2651 returning=returning or self._parse_returning(), 2652 limit=self._parse_limit(), 2653 ) 2654 2655 def _parse_update(self) -> exp.Update: 2656 comments = self._prev_comments 2657 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2658 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2659 returning = self._parse_returning() 2660 return self.expression( 2661 exp.Update, 2662 comments=comments, 2663 **{ # type: ignore 2664 "this": this, 2665 "expressions": expressions, 2666 "from": self._parse_from(joins=True), 2667 "where": self._parse_where(), 2668 "returning": returning or self._parse_returning(), 2669 "order": self._parse_order(), 2670 "limit": self._parse_limit(), 2671 }, 2672 ) 2673 2674 def _parse_uncache(self) -> exp.Uncache: 2675 if not self._match(TokenType.TABLE): 2676 self.raise_error("Expecting TABLE after UNCACHE") 2677 2678 return self.expression( 2679 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2680 ) 2681 2682 def _parse_cache(self) -> exp.Cache: 2683 lazy = self._match_text_seq("LAZY") 2684 self._match(TokenType.TABLE) 2685 table = self._parse_table(schema=True) 2686 2687 options = [] 2688 if self._match_text_seq("OPTIONS"): 2689 self._match_l_paren() 2690 k = self._parse_string() 2691 self._match(TokenType.EQ) 2692 v = self._parse_string() 2693 options = [k, v] 2694 self._match_r_paren() 2695 2696 self._match(TokenType.ALIAS) 2697 return self.expression( 2698 exp.Cache, 2699 this=table, 2700 lazy=lazy, 2701 options=options, 2702 expression=self._parse_select(nested=True), 2703 ) 2704 2705 def _parse_partition(self) -> t.Optional[exp.Partition]: 2706 if not self._match(TokenType.PARTITION): 2707 return None 2708 2709 return self.expression( 2710 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2711 ) 2712 2713 def _parse_value(self) -> t.Optional[exp.Tuple]: 2714 if self._match(TokenType.L_PAREN): 2715 expressions = self._parse_csv(self._parse_expression) 2716 self._match_r_paren() 2717 return self.expression(exp.Tuple, expressions=expressions) 2718 2719 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2720 expression = self._parse_expression() 2721 if expression: 2722 return self.expression(exp.Tuple, expressions=[expression]) 2723 return None 2724 2725 def _parse_projections(self) -> t.List[exp.Expression]: 2726 return self._parse_expressions() 2727 2728 def _parse_select( 2729 self, 2730 nested: bool = False, 2731 table: bool = False, 2732 parse_subquery_alias: bool = True, 2733 parse_set_operation: bool = True, 2734 ) -> t.Optional[exp.Expression]: 2735 cte = self._parse_with() 2736 2737 if cte: 2738 this = self._parse_statement() 2739 2740 if not this: 2741 self.raise_error("Failed to parse any statement following CTE") 2742 return cte 2743 2744 if "with" in this.arg_types: 2745 this.set("with", cte) 2746 else: 2747 self.raise_error(f"{this.key} does not support CTE") 2748 this = cte 2749 2750 return this 2751 2752 # duckdb supports leading with FROM x 2753 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2754 2755 if self._match(TokenType.SELECT): 2756 comments = self._prev_comments 2757 2758 hint = self._parse_hint() 2759 2760 if self._next and not self._next.token_type == TokenType.DOT: 2761 all_ = self._match(TokenType.ALL) 2762 distinct = self._match_set(self.DISTINCT_TOKENS) 2763 else: 2764 all_, distinct = None, None 2765 2766 kind = ( 2767 self._match(TokenType.ALIAS) 2768 and self._match_texts(("STRUCT", "VALUE")) 2769 and self._prev.text.upper() 2770 ) 2771 2772 if distinct: 2773 distinct = self.expression( 2774 exp.Distinct, 2775 on=self._parse_value() if self._match(TokenType.ON) else None, 2776 ) 2777 2778 if all_ and distinct: 2779 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2780 2781 limit = self._parse_limit(top=True) 2782 projections = self._parse_projections() 2783 2784 this = self.expression( 2785 exp.Select, 2786 kind=kind, 2787 hint=hint, 2788 distinct=distinct, 2789 expressions=projections, 2790 limit=limit, 2791 ) 2792 this.comments = comments 2793 2794 into = self._parse_into() 2795 if into: 2796 this.set("into", into) 2797 2798 if not from_: 2799 from_ = self._parse_from() 2800 2801 if from_: 2802 this.set("from", from_) 2803 2804 this = self._parse_query_modifiers(this) 2805 elif (table or nested) and self._match(TokenType.L_PAREN): 2806 if self._match(TokenType.PIVOT): 2807 this = self._parse_simplified_pivot() 2808 elif self._match(TokenType.FROM): 2809 this = exp.select("*").from_( 2810 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2811 ) 2812 else: 2813 this = ( 2814 self._parse_table() 2815 if table 2816 else self._parse_select(nested=True, parse_set_operation=False) 2817 ) 2818 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2819 2820 self._match_r_paren() 2821 2822 # We return early here so that the UNION isn't attached to the subquery by the 2823 # following call to _parse_set_operations, but instead becomes the parent node 2824 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2825 elif self._match(TokenType.VALUES, advance=False): 2826 this = self._parse_derived_table_values() 2827 elif from_: 2828 this = exp.select("*").from_(from_.this, copy=False) 2829 else: 2830 this = None 2831 2832 if parse_set_operation: 2833 return self._parse_set_operations(this) 2834 return this 2835 2836 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2837 if not skip_with_token and not self._match(TokenType.WITH): 2838 return None 2839 2840 comments = self._prev_comments 2841 recursive = self._match(TokenType.RECURSIVE) 2842 2843 expressions = [] 2844 while True: 2845 expressions.append(self._parse_cte()) 2846 2847 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2848 break 2849 else: 2850 self._match(TokenType.WITH) 2851 2852 return self.expression( 2853 exp.With, comments=comments, expressions=expressions, recursive=recursive 2854 ) 2855 2856 def _parse_cte(self) -> exp.CTE: 2857 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2858 if not alias or not alias.this: 2859 self.raise_error("Expected CTE to have alias") 2860 2861 self._match(TokenType.ALIAS) 2862 comments = self._prev_comments 2863 2864 if self._match_text_seq("NOT", "MATERIALIZED"): 2865 materialized = False 2866 elif self._match_text_seq("MATERIALIZED"): 2867 materialized = True 2868 else: 2869 materialized = None 2870 2871 return self.expression( 2872 exp.CTE, 2873 this=self._parse_wrapped(self._parse_statement), 2874 alias=alias, 2875 materialized=materialized, 2876 comments=comments, 2877 ) 2878 2879 def _parse_table_alias( 2880 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2881 ) -> t.Optional[exp.TableAlias]: 2882 any_token = self._match(TokenType.ALIAS) 2883 alias = ( 2884 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2885 or self._parse_string_as_identifier() 2886 ) 2887 2888 index = self._index 2889 if self._match(TokenType.L_PAREN): 2890 columns = self._parse_csv(self._parse_function_parameter) 2891 self._match_r_paren() if columns else self._retreat(index) 2892 else: 2893 columns = None 2894 2895 if not alias and not columns: 2896 return None 2897 2898 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2899 2900 # We bubble up comments from the Identifier to the TableAlias 2901 if isinstance(alias, exp.Identifier): 2902 table_alias.add_comments(alias.pop_comments()) 2903 2904 return table_alias 2905 2906 def _parse_subquery( 2907 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2908 ) -> t.Optional[exp.Subquery]: 2909 if not this: 2910 return None 2911 2912 return self.expression( 2913 exp.Subquery, 2914 this=this, 2915 pivots=self._parse_pivots(), 2916 alias=self._parse_table_alias() if parse_alias else None, 2917 ) 2918 2919 def _implicit_unnests_to_explicit(self, this: E) -> E: 2920 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2921 2922 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2923 for i, join in enumerate(this.args.get("joins") or []): 2924 table = join.this 2925 normalized_table = table.copy() 2926 normalized_table.meta["maybe_column"] = True 2927 normalized_table = _norm(normalized_table, dialect=self.dialect) 2928 2929 if isinstance(table, exp.Table) and not join.args.get("on"): 2930 if normalized_table.parts[0].name in refs: 2931 table_as_column = table.to_column() 2932 unnest = exp.Unnest(expressions=[table_as_column]) 2933 2934 # Table.to_column creates a parent Alias node that we want to convert to 2935 # a TableAlias and attach to the Unnest, so it matches the parser's output 2936 if isinstance(table.args.get("alias"), exp.TableAlias): 2937 table_as_column.replace(table_as_column.this) 2938 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2939 2940 table.replace(unnest) 2941 2942 refs.add(normalized_table.alias_or_name) 2943 2944 return this 2945 2946 def _parse_query_modifiers( 2947 self, this: t.Optional[exp.Expression] 2948 ) -> t.Optional[exp.Expression]: 2949 if isinstance(this, (exp.Query, exp.Table)): 2950 for join in self._parse_joins(): 2951 this.append("joins", join) 2952 for lateral in iter(self._parse_lateral, None): 2953 this.append("laterals", lateral) 2954 2955 while True: 2956 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2957 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2958 key, expression = parser(self) 2959 2960 if expression: 2961 this.set(key, expression) 2962 if key == "limit": 2963 offset = expression.args.pop("offset", None) 2964 2965 if offset: 2966 offset = exp.Offset(expression=offset) 2967 this.set("offset", offset) 2968 2969 limit_by_expressions = expression.expressions 2970 expression.set("expressions", None) 2971 offset.set("expressions", limit_by_expressions) 2972 continue 2973 break 2974 2975 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2976 this = self._implicit_unnests_to_explicit(this) 2977 2978 return this 2979 2980 def _parse_hint(self) -> t.Optional[exp.Hint]: 2981 if self._match(TokenType.HINT): 2982 hints = [] 2983 for hint in iter( 2984 lambda: self._parse_csv( 2985 lambda: self._parse_function() or self._parse_var(upper=True) 2986 ), 2987 [], 2988 ): 2989 hints.extend(hint) 2990 2991 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2992 self.raise_error("Expected */ after HINT") 2993 2994 return self.expression(exp.Hint, expressions=hints) 2995 2996 return None 2997 2998 def _parse_into(self) -> t.Optional[exp.Into]: 2999 if not self._match(TokenType.INTO): 3000 return None 3001 3002 temp = self._match(TokenType.TEMPORARY) 3003 unlogged = self._match_text_seq("UNLOGGED") 3004 self._match(TokenType.TABLE) 3005 3006 return self.expression( 3007 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3008 ) 3009 3010 def _parse_from( 3011 self, joins: bool = False, skip_from_token: bool = False 3012 ) -> t.Optional[exp.From]: 3013 if not skip_from_token and not self._match(TokenType.FROM): 3014 return None 3015 3016 return self.expression( 3017 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3018 ) 3019 3020 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3021 return self.expression( 3022 exp.MatchRecognizeMeasure, 3023 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3024 this=self._parse_expression(), 3025 ) 3026 3027 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3028 if not self._match(TokenType.MATCH_RECOGNIZE): 3029 return None 3030 3031 self._match_l_paren() 3032 3033 partition = self._parse_partition_by() 3034 order = self._parse_order() 3035 3036 measures = ( 3037 self._parse_csv(self._parse_match_recognize_measure) 3038 if self._match_text_seq("MEASURES") 3039 else None 3040 ) 3041 3042 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3043 rows = exp.var("ONE ROW PER MATCH") 3044 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3045 text = "ALL ROWS PER MATCH" 3046 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3047 text += " SHOW EMPTY MATCHES" 3048 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3049 text += " OMIT EMPTY MATCHES" 3050 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3051 text += " WITH UNMATCHED ROWS" 3052 rows = exp.var(text) 3053 else: 3054 rows = None 3055 3056 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3057 text = "AFTER MATCH SKIP" 3058 if self._match_text_seq("PAST", "LAST", "ROW"): 3059 text += " PAST LAST ROW" 3060 elif self._match_text_seq("TO", "NEXT", "ROW"): 3061 text += " TO NEXT ROW" 3062 elif self._match_text_seq("TO", "FIRST"): 3063 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3064 elif self._match_text_seq("TO", "LAST"): 3065 text += f" TO LAST {self._advance_any().text}" # type: ignore 3066 after = exp.var(text) 3067 else: 3068 after = None 3069 3070 if self._match_text_seq("PATTERN"): 3071 self._match_l_paren() 3072 3073 if not self._curr: 3074 self.raise_error("Expecting )", self._curr) 3075 3076 paren = 1 3077 start = self._curr 3078 3079 while self._curr and paren > 0: 3080 if self._curr.token_type == TokenType.L_PAREN: 3081 paren += 1 3082 if self._curr.token_type == TokenType.R_PAREN: 3083 paren -= 1 3084 3085 end = self._prev 3086 self._advance() 3087 3088 if paren > 0: 3089 self.raise_error("Expecting )", self._curr) 3090 3091 pattern = exp.var(self._find_sql(start, end)) 3092 else: 3093 pattern = None 3094 3095 define = ( 3096 self._parse_csv(self._parse_name_as_expression) 3097 if self._match_text_seq("DEFINE") 3098 else None 3099 ) 3100 3101 self._match_r_paren() 3102 3103 return self.expression( 3104 exp.MatchRecognize, 3105 partition_by=partition, 3106 order=order, 3107 measures=measures, 3108 rows=rows, 3109 after=after, 3110 pattern=pattern, 3111 define=define, 3112 alias=self._parse_table_alias(), 3113 ) 3114 3115 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3116 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3117 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3118 cross_apply = False 3119 3120 if cross_apply is not None: 3121 this = self._parse_select(table=True) 3122 view = None 3123 outer = None 3124 elif self._match(TokenType.LATERAL): 3125 this = self._parse_select(table=True) 3126 view = self._match(TokenType.VIEW) 3127 outer = self._match(TokenType.OUTER) 3128 else: 3129 return None 3130 3131 if not this: 3132 this = ( 3133 self._parse_unnest() 3134 or self._parse_function() 3135 or self._parse_id_var(any_token=False) 3136 ) 3137 3138 while self._match(TokenType.DOT): 3139 this = exp.Dot( 3140 this=this, 3141 expression=self._parse_function() or self._parse_id_var(any_token=False), 3142 ) 3143 3144 if view: 3145 table = self._parse_id_var(any_token=False) 3146 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3147 table_alias: t.Optional[exp.TableAlias] = self.expression( 3148 exp.TableAlias, this=table, columns=columns 3149 ) 3150 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3151 # We move the alias from the lateral's child node to the lateral itself 3152 table_alias = this.args["alias"].pop() 3153 else: 3154 table_alias = self._parse_table_alias() 3155 3156 return self.expression( 3157 exp.Lateral, 3158 this=this, 3159 view=view, 3160 outer=outer, 3161 alias=table_alias, 3162 cross_apply=cross_apply, 3163 ) 3164 3165 def _parse_join_parts( 3166 self, 3167 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3168 return ( 3169 self._match_set(self.JOIN_METHODS) and self._prev, 3170 self._match_set(self.JOIN_SIDES) and self._prev, 3171 self._match_set(self.JOIN_KINDS) and self._prev, 3172 ) 3173 3174 def _parse_join( 3175 self, skip_join_token: bool = False, parse_bracket: bool = False 3176 ) -> t.Optional[exp.Join]: 3177 if self._match(TokenType.COMMA): 3178 return self.expression(exp.Join, this=self._parse_table()) 3179 3180 index = self._index 3181 method, side, kind = self._parse_join_parts() 3182 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3183 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3184 3185 if not skip_join_token and not join: 3186 self._retreat(index) 3187 kind = None 3188 method = None 3189 side = None 3190 3191 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3192 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3193 3194 if not skip_join_token and not join and not outer_apply and not cross_apply: 3195 return None 3196 3197 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3198 3199 if method: 3200 kwargs["method"] = method.text 3201 if side: 3202 kwargs["side"] = side.text 3203 if kind: 3204 kwargs["kind"] = kind.text 3205 if hint: 3206 kwargs["hint"] = hint 3207 3208 if self._match(TokenType.MATCH_CONDITION): 3209 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3210 3211 if self._match(TokenType.ON): 3212 kwargs["on"] = self._parse_assignment() 3213 elif self._match(TokenType.USING): 3214 kwargs["using"] = self._parse_wrapped_id_vars() 3215 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3216 kind and kind.token_type == TokenType.CROSS 3217 ): 3218 index = self._index 3219 joins: t.Optional[list] = list(self._parse_joins()) 3220 3221 if joins and self._match(TokenType.ON): 3222 kwargs["on"] = self._parse_assignment() 3223 elif joins and self._match(TokenType.USING): 3224 kwargs["using"] = self._parse_wrapped_id_vars() 3225 else: 3226 joins = None 3227 self._retreat(index) 3228 3229 kwargs["this"].set("joins", joins if joins else None) 3230 3231 comments = [c for token in (method, side, kind) if token for c in token.comments] 3232 return self.expression(exp.Join, comments=comments, **kwargs) 3233 3234 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3235 this = self._parse_assignment() 3236 3237 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3238 return this 3239 3240 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3241 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3242 3243 return this 3244 3245 def _parse_index_params(self) -> exp.IndexParameters: 3246 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3247 3248 if self._match(TokenType.L_PAREN, advance=False): 3249 columns = self._parse_wrapped_csv(self._parse_with_operator) 3250 else: 3251 columns = None 3252 3253 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3254 partition_by = self._parse_partition_by() 3255 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3256 tablespace = ( 3257 self._parse_var(any_token=True) 3258 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3259 else None 3260 ) 3261 where = self._parse_where() 3262 3263 on = self._parse_field() if self._match(TokenType.ON) else None 3264 3265 return self.expression( 3266 exp.IndexParameters, 3267 using=using, 3268 columns=columns, 3269 include=include, 3270 partition_by=partition_by, 3271 where=where, 3272 with_storage=with_storage, 3273 tablespace=tablespace, 3274 on=on, 3275 ) 3276 3277 def _parse_index( 3278 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3279 ) -> t.Optional[exp.Index]: 3280 if index or anonymous: 3281 unique = None 3282 primary = None 3283 amp = None 3284 3285 self._match(TokenType.ON) 3286 self._match(TokenType.TABLE) # hive 3287 table = self._parse_table_parts(schema=True) 3288 else: 3289 unique = self._match(TokenType.UNIQUE) 3290 primary = self._match_text_seq("PRIMARY") 3291 amp = self._match_text_seq("AMP") 3292 3293 if not self._match(TokenType.INDEX): 3294 return None 3295 3296 index = self._parse_id_var() 3297 table = None 3298 3299 params = self._parse_index_params() 3300 3301 return self.expression( 3302 exp.Index, 3303 this=index, 3304 table=table, 3305 unique=unique, 3306 primary=primary, 3307 amp=amp, 3308 params=params, 3309 ) 3310 3311 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3312 hints: t.List[exp.Expression] = [] 3313 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3314 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3315 hints.append( 3316 self.expression( 3317 exp.WithTableHint, 3318 expressions=self._parse_csv( 3319 lambda: self._parse_function() or self._parse_var(any_token=True) 3320 ), 3321 ) 3322 ) 3323 self._match_r_paren() 3324 else: 3325 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3326 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3327 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3328 3329 self._match_set((TokenType.INDEX, TokenType.KEY)) 3330 if self._match(TokenType.FOR): 3331 hint.set("target", self._advance_any() and self._prev.text.upper()) 3332 3333 hint.set("expressions", self._parse_wrapped_id_vars()) 3334 hints.append(hint) 3335 3336 return hints or None 3337 3338 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3339 return ( 3340 (not schema and self._parse_function(optional_parens=False)) 3341 or self._parse_id_var(any_token=False) 3342 or self._parse_string_as_identifier() 3343 or self._parse_placeholder() 3344 ) 3345 3346 def _parse_table_parts( 3347 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3348 ) -> exp.Table: 3349 catalog = None 3350 db = None 3351 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3352 3353 while self._match(TokenType.DOT): 3354 if catalog: 3355 # This allows nesting the table in arbitrarily many dot expressions if needed 3356 table = self.expression( 3357 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3358 ) 3359 else: 3360 catalog = db 3361 db = table 3362 # "" used for tsql FROM a..b case 3363 table = self._parse_table_part(schema=schema) or "" 3364 3365 if ( 3366 wildcard 3367 and self._is_connected() 3368 and (isinstance(table, exp.Identifier) or not table) 3369 and self._match(TokenType.STAR) 3370 ): 3371 if isinstance(table, exp.Identifier): 3372 table.args["this"] += "*" 3373 else: 3374 table = exp.Identifier(this="*") 3375 3376 # We bubble up comments from the Identifier to the Table 3377 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3378 3379 if is_db_reference: 3380 catalog = db 3381 db = table 3382 table = None 3383 3384 if not table and not is_db_reference: 3385 self.raise_error(f"Expected table name but got {self._curr}") 3386 if not db and is_db_reference: 3387 self.raise_error(f"Expected database name but got {self._curr}") 3388 3389 table = self.expression( 3390 exp.Table, 3391 comments=comments, 3392 this=table, 3393 db=db, 3394 catalog=catalog, 3395 ) 3396 3397 changes = self._parse_changes() 3398 if changes: 3399 table.set("changes", changes) 3400 3401 at_before = self._parse_historical_data() 3402 if at_before: 3403 table.set("when", at_before) 3404 3405 pivots = self._parse_pivots() 3406 if pivots: 3407 table.set("pivots", pivots) 3408 3409 return table 3410 3411 def _parse_table( 3412 self, 3413 schema: bool = False, 3414 joins: bool = False, 3415 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3416 parse_bracket: bool = False, 3417 is_db_reference: bool = False, 3418 parse_partition: bool = False, 3419 ) -> t.Optional[exp.Expression]: 3420 lateral = self._parse_lateral() 3421 if lateral: 3422 return lateral 3423 3424 unnest = self._parse_unnest() 3425 if unnest: 3426 return unnest 3427 3428 values = self._parse_derived_table_values() 3429 if values: 3430 return values 3431 3432 subquery = self._parse_select(table=True) 3433 if subquery: 3434 if not subquery.args.get("pivots"): 3435 subquery.set("pivots", self._parse_pivots()) 3436 return subquery 3437 3438 bracket = parse_bracket and self._parse_bracket(None) 3439 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3440 3441 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3442 self._parse_table 3443 ) 3444 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3445 3446 only = self._match(TokenType.ONLY) 3447 3448 this = t.cast( 3449 exp.Expression, 3450 bracket 3451 or rows_from 3452 or self._parse_bracket( 3453 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3454 ), 3455 ) 3456 3457 if only: 3458 this.set("only", only) 3459 3460 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3461 self._match_text_seq("*") 3462 3463 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3464 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3465 this.set("partition", self._parse_partition()) 3466 3467 if schema: 3468 return self._parse_schema(this=this) 3469 3470 version = self._parse_version() 3471 3472 if version: 3473 this.set("version", version) 3474 3475 if self.dialect.ALIAS_POST_TABLESAMPLE: 3476 table_sample = self._parse_table_sample() 3477 3478 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3479 if alias: 3480 this.set("alias", alias) 3481 3482 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3483 return self.expression( 3484 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3485 ) 3486 3487 this.set("hints", self._parse_table_hints()) 3488 3489 if not this.args.get("pivots"): 3490 this.set("pivots", self._parse_pivots()) 3491 3492 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3493 table_sample = self._parse_table_sample() 3494 3495 if table_sample: 3496 table_sample.set("this", this) 3497 this = table_sample 3498 3499 if joins: 3500 for join in self._parse_joins(): 3501 this.append("joins", join) 3502 3503 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3504 this.set("ordinality", True) 3505 this.set("alias", self._parse_table_alias()) 3506 3507 return this 3508 3509 def _parse_version(self) -> t.Optional[exp.Version]: 3510 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3511 this = "TIMESTAMP" 3512 elif self._match(TokenType.VERSION_SNAPSHOT): 3513 this = "VERSION" 3514 else: 3515 return None 3516 3517 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3518 kind = self._prev.text.upper() 3519 start = self._parse_bitwise() 3520 self._match_texts(("TO", "AND")) 3521 end = self._parse_bitwise() 3522 expression: t.Optional[exp.Expression] = self.expression( 3523 exp.Tuple, expressions=[start, end] 3524 ) 3525 elif self._match_text_seq("CONTAINED", "IN"): 3526 kind = "CONTAINED IN" 3527 expression = self.expression( 3528 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3529 ) 3530 elif self._match(TokenType.ALL): 3531 kind = "ALL" 3532 expression = None 3533 else: 3534 self._match_text_seq("AS", "OF") 3535 kind = "AS OF" 3536 expression = self._parse_type() 3537 3538 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3539 3540 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3541 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3542 index = self._index 3543 historical_data = None 3544 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3545 this = self._prev.text.upper() 3546 kind = ( 3547 self._match(TokenType.L_PAREN) 3548 and self._match_texts(self.HISTORICAL_DATA_KIND) 3549 and self._prev.text.upper() 3550 ) 3551 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3552 3553 if expression: 3554 self._match_r_paren() 3555 historical_data = self.expression( 3556 exp.HistoricalData, this=this, kind=kind, expression=expression 3557 ) 3558 else: 3559 self._retreat(index) 3560 3561 return historical_data 3562 3563 def _parse_changes(self) -> t.Optional[exp.Changes]: 3564 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3565 return None 3566 3567 information = self._parse_var(any_token=True) 3568 self._match_r_paren() 3569 3570 return self.expression( 3571 exp.Changes, 3572 information=information, 3573 at_before=self._parse_historical_data(), 3574 end=self._parse_historical_data(), 3575 ) 3576 3577 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3578 if not self._match(TokenType.UNNEST): 3579 return None 3580 3581 expressions = self._parse_wrapped_csv(self._parse_equality) 3582 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3583 3584 alias = self._parse_table_alias() if with_alias else None 3585 3586 if alias: 3587 if self.dialect.UNNEST_COLUMN_ONLY: 3588 if alias.args.get("columns"): 3589 self.raise_error("Unexpected extra column alias in unnest.") 3590 3591 alias.set("columns", [alias.this]) 3592 alias.set("this", None) 3593 3594 columns = alias.args.get("columns") or [] 3595 if offset and len(expressions) < len(columns): 3596 offset = columns.pop() 3597 3598 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3599 self._match(TokenType.ALIAS) 3600 offset = self._parse_id_var( 3601 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3602 ) or exp.to_identifier("offset") 3603 3604 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3605 3606 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3607 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3608 if not is_derived and not self._match_text_seq("VALUES"): 3609 return None 3610 3611 expressions = self._parse_csv(self._parse_value) 3612 alias = self._parse_table_alias() 3613 3614 if is_derived: 3615 self._match_r_paren() 3616 3617 return self.expression( 3618 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3619 ) 3620 3621 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3622 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3623 as_modifier and self._match_text_seq("USING", "SAMPLE") 3624 ): 3625 return None 3626 3627 bucket_numerator = None 3628 bucket_denominator = None 3629 bucket_field = None 3630 percent = None 3631 size = None 3632 seed = None 3633 3634 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3635 matched_l_paren = self._match(TokenType.L_PAREN) 3636 3637 if self.TABLESAMPLE_CSV: 3638 num = None 3639 expressions = self._parse_csv(self._parse_primary) 3640 else: 3641 expressions = None 3642 num = ( 3643 self._parse_factor() 3644 if self._match(TokenType.NUMBER, advance=False) 3645 else self._parse_primary() or self._parse_placeholder() 3646 ) 3647 3648 if self._match_text_seq("BUCKET"): 3649 bucket_numerator = self._parse_number() 3650 self._match_text_seq("OUT", "OF") 3651 bucket_denominator = bucket_denominator = self._parse_number() 3652 self._match(TokenType.ON) 3653 bucket_field = self._parse_field() 3654 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3655 percent = num 3656 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3657 size = num 3658 else: 3659 percent = num 3660 3661 if matched_l_paren: 3662 self._match_r_paren() 3663 3664 if self._match(TokenType.L_PAREN): 3665 method = self._parse_var(upper=True) 3666 seed = self._match(TokenType.COMMA) and self._parse_number() 3667 self._match_r_paren() 3668 elif self._match_texts(("SEED", "REPEATABLE")): 3669 seed = self._parse_wrapped(self._parse_number) 3670 3671 if not method and self.DEFAULT_SAMPLING_METHOD: 3672 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3673 3674 return self.expression( 3675 exp.TableSample, 3676 expressions=expressions, 3677 method=method, 3678 bucket_numerator=bucket_numerator, 3679 bucket_denominator=bucket_denominator, 3680 bucket_field=bucket_field, 3681 percent=percent, 3682 size=size, 3683 seed=seed, 3684 ) 3685 3686 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3687 return list(iter(self._parse_pivot, None)) or None 3688 3689 def _parse_joins(self) -> t.Iterator[exp.Join]: 3690 return iter(self._parse_join, None) 3691 3692 # https://duckdb.org/docs/sql/statements/pivot 3693 def _parse_simplified_pivot(self) -> exp.Pivot: 3694 def _parse_on() -> t.Optional[exp.Expression]: 3695 this = self._parse_bitwise() 3696 return self._parse_in(this) if self._match(TokenType.IN) else this 3697 3698 this = self._parse_table() 3699 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3700 using = self._match(TokenType.USING) and self._parse_csv( 3701 lambda: self._parse_alias(self._parse_function()) 3702 ) 3703 group = self._parse_group() 3704 return self.expression( 3705 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3706 ) 3707 3708 def _parse_pivot_in(self) -> exp.In: 3709 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3710 this = self._parse_assignment() 3711 3712 self._match(TokenType.ALIAS) 3713 alias = self._parse_field() 3714 if alias: 3715 return self.expression(exp.PivotAlias, this=this, alias=alias) 3716 3717 return this 3718 3719 value = self._parse_column() 3720 3721 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3722 self.raise_error("Expecting IN (") 3723 3724 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3725 3726 self._match_r_paren() 3727 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3728 3729 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3730 index = self._index 3731 include_nulls = None 3732 3733 if self._match(TokenType.PIVOT): 3734 unpivot = False 3735 elif self._match(TokenType.UNPIVOT): 3736 unpivot = True 3737 3738 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3739 if self._match_text_seq("INCLUDE", "NULLS"): 3740 include_nulls = True 3741 elif self._match_text_seq("EXCLUDE", "NULLS"): 3742 include_nulls = False 3743 else: 3744 return None 3745 3746 expressions = [] 3747 3748 if not self._match(TokenType.L_PAREN): 3749 self._retreat(index) 3750 return None 3751 3752 if unpivot: 3753 expressions = self._parse_csv(self._parse_column) 3754 else: 3755 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3756 3757 if not expressions: 3758 self.raise_error("Failed to parse PIVOT's aggregation list") 3759 3760 if not self._match(TokenType.FOR): 3761 self.raise_error("Expecting FOR") 3762 3763 field = self._parse_pivot_in() 3764 3765 self._match_r_paren() 3766 3767 pivot = self.expression( 3768 exp.Pivot, 3769 expressions=expressions, 3770 field=field, 3771 unpivot=unpivot, 3772 include_nulls=include_nulls, 3773 ) 3774 3775 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3776 pivot.set("alias", self._parse_table_alias()) 3777 3778 if not unpivot: 3779 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3780 3781 columns: t.List[exp.Expression] = [] 3782 for fld in pivot.args["field"].expressions: 3783 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3784 for name in names: 3785 if self.PREFIXED_PIVOT_COLUMNS: 3786 name = f"{name}_{field_name}" if name else field_name 3787 else: 3788 name = f"{field_name}_{name}" if name else field_name 3789 3790 columns.append(exp.to_identifier(name)) 3791 3792 pivot.set("columns", columns) 3793 3794 return pivot 3795 3796 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3797 return [agg.alias for agg in aggregations] 3798 3799 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3800 if not skip_where_token and not self._match(TokenType.PREWHERE): 3801 return None 3802 3803 return self.expression( 3804 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3805 ) 3806 3807 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3808 if not skip_where_token and not self._match(TokenType.WHERE): 3809 return None 3810 3811 return self.expression( 3812 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3813 ) 3814 3815 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3816 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3817 return None 3818 3819 elements: t.Dict[str, t.Any] = defaultdict(list) 3820 3821 if self._match(TokenType.ALL): 3822 elements["all"] = True 3823 elif self._match(TokenType.DISTINCT): 3824 elements["all"] = False 3825 3826 while True: 3827 expressions = self._parse_csv( 3828 lambda: None 3829 if self._match(TokenType.ROLLUP, advance=False) 3830 else self._parse_assignment() 3831 ) 3832 if expressions: 3833 elements["expressions"].extend(expressions) 3834 3835 grouping_sets = self._parse_grouping_sets() 3836 if grouping_sets: 3837 elements["grouping_sets"].extend(grouping_sets) 3838 3839 rollup = None 3840 cube = None 3841 totals = None 3842 3843 index = self._index 3844 with_ = self._match(TokenType.WITH) 3845 if self._match(TokenType.ROLLUP): 3846 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3847 elements["rollup"].extend(ensure_list(rollup)) 3848 3849 if self._match(TokenType.CUBE): 3850 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3851 elements["cube"].extend(ensure_list(cube)) 3852 3853 if self._match_text_seq("TOTALS"): 3854 totals = True 3855 elements["totals"] = True # type: ignore 3856 3857 if not (grouping_sets or rollup or cube or totals): 3858 if with_: 3859 self._retreat(index) 3860 break 3861 3862 return self.expression(exp.Group, **elements) # type: ignore 3863 3864 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3865 if not self._match(TokenType.GROUPING_SETS): 3866 return None 3867 3868 return self._parse_wrapped_csv(self._parse_grouping_set) 3869 3870 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3871 if self._match(TokenType.L_PAREN): 3872 grouping_set = self._parse_csv(self._parse_column) 3873 self._match_r_paren() 3874 return self.expression(exp.Tuple, expressions=grouping_set) 3875 3876 return self._parse_column() 3877 3878 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3879 if not skip_having_token and not self._match(TokenType.HAVING): 3880 return None 3881 return self.expression(exp.Having, this=self._parse_assignment()) 3882 3883 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3884 if not self._match(TokenType.QUALIFY): 3885 return None 3886 return self.expression(exp.Qualify, this=self._parse_assignment()) 3887 3888 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3889 if skip_start_token: 3890 start = None 3891 elif self._match(TokenType.START_WITH): 3892 start = self._parse_assignment() 3893 else: 3894 return None 3895 3896 self._match(TokenType.CONNECT_BY) 3897 nocycle = self._match_text_seq("NOCYCLE") 3898 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3899 exp.Prior, this=self._parse_bitwise() 3900 ) 3901 connect = self._parse_assignment() 3902 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3903 3904 if not start and self._match(TokenType.START_WITH): 3905 start = self._parse_assignment() 3906 3907 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3908 3909 def _parse_name_as_expression(self) -> exp.Alias: 3910 return self.expression( 3911 exp.Alias, 3912 alias=self._parse_id_var(any_token=True), 3913 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3914 ) 3915 3916 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3917 if self._match_text_seq("INTERPOLATE"): 3918 return self._parse_wrapped_csv(self._parse_name_as_expression) 3919 return None 3920 3921 def _parse_order( 3922 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3923 ) -> t.Optional[exp.Expression]: 3924 siblings = None 3925 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3926 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3927 return this 3928 3929 siblings = True 3930 3931 return self.expression( 3932 exp.Order, 3933 this=this, 3934 expressions=self._parse_csv(self._parse_ordered), 3935 interpolate=self._parse_interpolate(), 3936 siblings=siblings, 3937 ) 3938 3939 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3940 if not self._match(token): 3941 return None 3942 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3943 3944 def _parse_ordered( 3945 self, parse_method: t.Optional[t.Callable] = None 3946 ) -> t.Optional[exp.Ordered]: 3947 this = parse_method() if parse_method else self._parse_assignment() 3948 if not this: 3949 return None 3950 3951 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3952 this = exp.var("ALL") 3953 3954 asc = self._match(TokenType.ASC) 3955 desc = self._match(TokenType.DESC) or (asc and False) 3956 3957 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3958 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3959 3960 nulls_first = is_nulls_first or False 3961 explicitly_null_ordered = is_nulls_first or is_nulls_last 3962 3963 if ( 3964 not explicitly_null_ordered 3965 and ( 3966 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3967 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3968 ) 3969 and self.dialect.NULL_ORDERING != "nulls_are_last" 3970 ): 3971 nulls_first = True 3972 3973 if self._match_text_seq("WITH", "FILL"): 3974 with_fill = self.expression( 3975 exp.WithFill, 3976 **{ # type: ignore 3977 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3978 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3979 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3980 }, 3981 ) 3982 else: 3983 with_fill = None 3984 3985 return self.expression( 3986 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3987 ) 3988 3989 def _parse_limit( 3990 self, 3991 this: t.Optional[exp.Expression] = None, 3992 top: bool = False, 3993 skip_limit_token: bool = False, 3994 ) -> t.Optional[exp.Expression]: 3995 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3996 comments = self._prev_comments 3997 if top: 3998 limit_paren = self._match(TokenType.L_PAREN) 3999 expression = self._parse_term() if limit_paren else self._parse_number() 4000 4001 if limit_paren: 4002 self._match_r_paren() 4003 else: 4004 expression = self._parse_term() 4005 4006 if self._match(TokenType.COMMA): 4007 offset = expression 4008 expression = self._parse_term() 4009 else: 4010 offset = None 4011 4012 limit_exp = self.expression( 4013 exp.Limit, 4014 this=this, 4015 expression=expression, 4016 offset=offset, 4017 comments=comments, 4018 expressions=self._parse_limit_by(), 4019 ) 4020 4021 return limit_exp 4022 4023 if self._match(TokenType.FETCH): 4024 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4025 direction = self._prev.text.upper() if direction else "FIRST" 4026 4027 count = self._parse_field(tokens=self.FETCH_TOKENS) 4028 percent = self._match(TokenType.PERCENT) 4029 4030 self._match_set((TokenType.ROW, TokenType.ROWS)) 4031 4032 only = self._match_text_seq("ONLY") 4033 with_ties = self._match_text_seq("WITH", "TIES") 4034 4035 if only and with_ties: 4036 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4037 4038 return self.expression( 4039 exp.Fetch, 4040 direction=direction, 4041 count=count, 4042 percent=percent, 4043 with_ties=with_ties, 4044 ) 4045 4046 return this 4047 4048 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4049 if not self._match(TokenType.OFFSET): 4050 return this 4051 4052 count = self._parse_term() 4053 self._match_set((TokenType.ROW, TokenType.ROWS)) 4054 4055 return self.expression( 4056 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4057 ) 4058 4059 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4060 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4061 4062 def _parse_locks(self) -> t.List[exp.Lock]: 4063 locks = [] 4064 while True: 4065 if self._match_text_seq("FOR", "UPDATE"): 4066 update = True 4067 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4068 "LOCK", "IN", "SHARE", "MODE" 4069 ): 4070 update = False 4071 else: 4072 break 4073 4074 expressions = None 4075 if self._match_text_seq("OF"): 4076 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4077 4078 wait: t.Optional[bool | exp.Expression] = None 4079 if self._match_text_seq("NOWAIT"): 4080 wait = True 4081 elif self._match_text_seq("WAIT"): 4082 wait = self._parse_primary() 4083 elif self._match_text_seq("SKIP", "LOCKED"): 4084 wait = False 4085 4086 locks.append( 4087 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4088 ) 4089 4090 return locks 4091 4092 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4093 while this and self._match_set(self.SET_OPERATIONS): 4094 token_type = self._prev.token_type 4095 4096 if token_type == TokenType.UNION: 4097 operation: t.Type[exp.SetOperation] = exp.Union 4098 elif token_type == TokenType.EXCEPT: 4099 operation = exp.Except 4100 else: 4101 operation = exp.Intersect 4102 4103 comments = self._prev.comments 4104 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4105 by_name = self._match_text_seq("BY", "NAME") 4106 expression = self._parse_select(nested=True, parse_set_operation=False) 4107 4108 this = self.expression( 4109 operation, 4110 comments=comments, 4111 this=this, 4112 distinct=distinct, 4113 by_name=by_name, 4114 expression=expression, 4115 ) 4116 4117 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4118 expression = this.expression 4119 4120 if expression: 4121 for arg in self.SET_OP_MODIFIERS: 4122 expr = expression.args.get(arg) 4123 if expr: 4124 this.set(arg, expr.pop()) 4125 4126 return this 4127 4128 def _parse_expression(self) -> t.Optional[exp.Expression]: 4129 return self._parse_alias(self._parse_assignment()) 4130 4131 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4132 this = self._parse_disjunction() 4133 4134 while self._match_set(self.ASSIGNMENT): 4135 this = self.expression( 4136 self.ASSIGNMENT[self._prev.token_type], 4137 this=this, 4138 comments=self._prev_comments, 4139 expression=self._parse_assignment(), 4140 ) 4141 4142 return this 4143 4144 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4145 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4146 4147 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4148 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4149 4150 def _parse_equality(self) -> t.Optional[exp.Expression]: 4151 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4152 4153 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4154 return self._parse_tokens(self._parse_range, self.COMPARISON) 4155 4156 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4157 this = this or self._parse_bitwise() 4158 negate = self._match(TokenType.NOT) 4159 4160 if self._match_set(self.RANGE_PARSERS): 4161 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4162 if not expression: 4163 return this 4164 4165 this = expression 4166 elif self._match(TokenType.ISNULL): 4167 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4168 4169 # Postgres supports ISNULL and NOTNULL for conditions. 4170 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4171 if self._match(TokenType.NOTNULL): 4172 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4173 this = self.expression(exp.Not, this=this) 4174 4175 if negate: 4176 this = self.expression(exp.Not, this=this) 4177 4178 if self._match(TokenType.IS): 4179 this = self._parse_is(this) 4180 4181 return this 4182 4183 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4184 index = self._index - 1 4185 negate = self._match(TokenType.NOT) 4186 4187 if self._match_text_seq("DISTINCT", "FROM"): 4188 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4189 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4190 4191 expression = self._parse_null() or self._parse_boolean() 4192 if not expression: 4193 self._retreat(index) 4194 return None 4195 4196 this = self.expression(exp.Is, this=this, expression=expression) 4197 return self.expression(exp.Not, this=this) if negate else this 4198 4199 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4200 unnest = self._parse_unnest(with_alias=False) 4201 if unnest: 4202 this = self.expression(exp.In, this=this, unnest=unnest) 4203 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4204 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4205 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4206 4207 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4208 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4209 else: 4210 this = self.expression(exp.In, this=this, expressions=expressions) 4211 4212 if matched_l_paren: 4213 self._match_r_paren(this) 4214 elif not self._match(TokenType.R_BRACKET, expression=this): 4215 self.raise_error("Expecting ]") 4216 else: 4217 this = self.expression(exp.In, this=this, field=self._parse_field()) 4218 4219 return this 4220 4221 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4222 low = self._parse_bitwise() 4223 self._match(TokenType.AND) 4224 high = self._parse_bitwise() 4225 return self.expression(exp.Between, this=this, low=low, high=high) 4226 4227 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4228 if not self._match(TokenType.ESCAPE): 4229 return this 4230 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4231 4232 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4233 index = self._index 4234 4235 if not self._match(TokenType.INTERVAL) and match_interval: 4236 return None 4237 4238 if self._match(TokenType.STRING, advance=False): 4239 this = self._parse_primary() 4240 else: 4241 this = self._parse_term() 4242 4243 if not this or ( 4244 isinstance(this, exp.Column) 4245 and not this.table 4246 and not this.this.quoted 4247 and this.name.upper() == "IS" 4248 ): 4249 self._retreat(index) 4250 return None 4251 4252 unit = self._parse_function() or ( 4253 not self._match(TokenType.ALIAS, advance=False) 4254 and self._parse_var(any_token=True, upper=True) 4255 ) 4256 4257 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4258 # each INTERVAL expression into this canonical form so it's easy to transpile 4259 if this and this.is_number: 4260 this = exp.Literal.string(this.to_py()) 4261 elif this and this.is_string: 4262 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4263 if len(parts) == 1: 4264 if unit: 4265 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4266 self._retreat(self._index - 1) 4267 4268 this = exp.Literal.string(parts[0][0]) 4269 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4270 4271 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4272 unit = self.expression( 4273 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4274 ) 4275 4276 interval = self.expression(exp.Interval, this=this, unit=unit) 4277 4278 index = self._index 4279 self._match(TokenType.PLUS) 4280 4281 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4282 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4283 return self.expression( 4284 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4285 ) 4286 4287 self._retreat(index) 4288 return interval 4289 4290 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4291 this = self._parse_term() 4292 4293 while True: 4294 if self._match_set(self.BITWISE): 4295 this = self.expression( 4296 self.BITWISE[self._prev.token_type], 4297 this=this, 4298 expression=self._parse_term(), 4299 ) 4300 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4301 this = self.expression( 4302 exp.DPipe, 4303 this=this, 4304 expression=self._parse_term(), 4305 safe=not self.dialect.STRICT_STRING_CONCAT, 4306 ) 4307 elif self._match(TokenType.DQMARK): 4308 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4309 elif self._match_pair(TokenType.LT, TokenType.LT): 4310 this = self.expression( 4311 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4312 ) 4313 elif self._match_pair(TokenType.GT, TokenType.GT): 4314 this = self.expression( 4315 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4316 ) 4317 else: 4318 break 4319 4320 return this 4321 4322 def _parse_term(self) -> t.Optional[exp.Expression]: 4323 return self._parse_tokens(self._parse_factor, self.TERM) 4324 4325 def _parse_factor(self) -> t.Optional[exp.Expression]: 4326 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4327 this = parse_method() 4328 4329 while self._match_set(self.FACTOR): 4330 klass = self.FACTOR[self._prev.token_type] 4331 comments = self._prev_comments 4332 expression = parse_method() 4333 4334 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4335 self._retreat(self._index - 1) 4336 return this 4337 4338 this = self.expression(klass, this=this, comments=comments, expression=expression) 4339 4340 if isinstance(this, exp.Div): 4341 this.args["typed"] = self.dialect.TYPED_DIVISION 4342 this.args["safe"] = self.dialect.SAFE_DIVISION 4343 4344 return this 4345 4346 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4347 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4348 4349 def _parse_unary(self) -> t.Optional[exp.Expression]: 4350 if self._match_set(self.UNARY_PARSERS): 4351 return self.UNARY_PARSERS[self._prev.token_type](self) 4352 return self._parse_at_time_zone(self._parse_type()) 4353 4354 def _parse_type( 4355 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4356 ) -> t.Optional[exp.Expression]: 4357 interval = parse_interval and self._parse_interval() 4358 if interval: 4359 return interval 4360 4361 index = self._index 4362 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4363 4364 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4365 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4366 if isinstance(data_type, exp.Cast): 4367 # This constructor can contain ops directly after it, for instance struct unnesting: 4368 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4369 return self._parse_column_ops(data_type) 4370 4371 if data_type: 4372 index2 = self._index 4373 this = self._parse_primary() 4374 4375 if isinstance(this, exp.Literal): 4376 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4377 if parser: 4378 return parser(self, this, data_type) 4379 4380 return self.expression(exp.Cast, this=this, to=data_type) 4381 4382 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4383 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4384 # 4385 # If the index difference here is greater than 1, that means the parser itself must have 4386 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4387 # 4388 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4389 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4390 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4391 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4392 # 4393 # In these cases, we don't really want to return the converted type, but instead retreat 4394 # and try to parse a Column or Identifier in the section below. 4395 if data_type.expressions and index2 - index > 1: 4396 self._retreat(index2) 4397 return self._parse_column_ops(data_type) 4398 4399 self._retreat(index) 4400 4401 if fallback_to_identifier: 4402 return self._parse_id_var() 4403 4404 this = self._parse_column() 4405 return this and self._parse_column_ops(this) 4406 4407 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4408 this = self._parse_type() 4409 if not this: 4410 return None 4411 4412 if isinstance(this, exp.Column) and not this.table: 4413 this = exp.var(this.name.upper()) 4414 4415 return self.expression( 4416 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4417 ) 4418 4419 def _parse_types( 4420 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4421 ) -> t.Optional[exp.Expression]: 4422 index = self._index 4423 4424 this: t.Optional[exp.Expression] = None 4425 prefix = self._match_text_seq("SYSUDTLIB", ".") 4426 4427 if not self._match_set(self.TYPE_TOKENS): 4428 identifier = allow_identifiers and self._parse_id_var( 4429 any_token=False, tokens=(TokenType.VAR,) 4430 ) 4431 if isinstance(identifier, exp.Identifier): 4432 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4433 4434 if len(tokens) != 1: 4435 self.raise_error("Unexpected identifier", self._prev) 4436 4437 if tokens[0].token_type in self.TYPE_TOKENS: 4438 self._prev = tokens[0] 4439 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4440 type_name = identifier.name 4441 4442 while self._match(TokenType.DOT): 4443 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4444 4445 this = exp.DataType.build(type_name, udt=True) 4446 else: 4447 self._retreat(self._index - 1) 4448 return None 4449 else: 4450 return None 4451 4452 type_token = self._prev.token_type 4453 4454 if type_token == TokenType.PSEUDO_TYPE: 4455 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4456 4457 if type_token == TokenType.OBJECT_IDENTIFIER: 4458 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4459 4460 # https://materialize.com/docs/sql/types/map/ 4461 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4462 key_type = self._parse_types( 4463 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4464 ) 4465 if not self._match(TokenType.FARROW): 4466 self._retreat(index) 4467 return None 4468 4469 value_type = self._parse_types( 4470 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4471 ) 4472 if not self._match(TokenType.R_BRACKET): 4473 self._retreat(index) 4474 return None 4475 4476 return exp.DataType( 4477 this=exp.DataType.Type.MAP, 4478 expressions=[key_type, value_type], 4479 nested=True, 4480 prefix=prefix, 4481 ) 4482 4483 nested = type_token in self.NESTED_TYPE_TOKENS 4484 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4485 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4486 expressions = None 4487 maybe_func = False 4488 4489 if self._match(TokenType.L_PAREN): 4490 if is_struct: 4491 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4492 elif nested: 4493 expressions = self._parse_csv( 4494 lambda: self._parse_types( 4495 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4496 ) 4497 ) 4498 elif type_token in self.ENUM_TYPE_TOKENS: 4499 expressions = self._parse_csv(self._parse_equality) 4500 elif is_aggregate: 4501 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4502 any_token=False, tokens=(TokenType.VAR,) 4503 ) 4504 if not func_or_ident or not self._match(TokenType.COMMA): 4505 return None 4506 expressions = self._parse_csv( 4507 lambda: self._parse_types( 4508 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4509 ) 4510 ) 4511 expressions.insert(0, func_or_ident) 4512 else: 4513 expressions = self._parse_csv(self._parse_type_size) 4514 4515 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4516 if type_token == TokenType.VECTOR and len(expressions) == 2: 4517 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4518 4519 if not expressions or not self._match(TokenType.R_PAREN): 4520 self._retreat(index) 4521 return None 4522 4523 maybe_func = True 4524 4525 values: t.Optional[t.List[exp.Expression]] = None 4526 4527 if nested and self._match(TokenType.LT): 4528 if is_struct: 4529 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4530 else: 4531 expressions = self._parse_csv( 4532 lambda: self._parse_types( 4533 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4534 ) 4535 ) 4536 4537 if not self._match(TokenType.GT): 4538 self.raise_error("Expecting >") 4539 4540 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4541 values = self._parse_csv(self._parse_assignment) 4542 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4543 4544 if type_token in self.TIMESTAMPS: 4545 if self._match_text_seq("WITH", "TIME", "ZONE"): 4546 maybe_func = False 4547 tz_type = ( 4548 exp.DataType.Type.TIMETZ 4549 if type_token in self.TIMES 4550 else exp.DataType.Type.TIMESTAMPTZ 4551 ) 4552 this = exp.DataType(this=tz_type, expressions=expressions) 4553 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4554 maybe_func = False 4555 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4556 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4557 maybe_func = False 4558 elif type_token == TokenType.INTERVAL: 4559 unit = self._parse_var(upper=True) 4560 if unit: 4561 if self._match_text_seq("TO"): 4562 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4563 4564 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4565 else: 4566 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4567 4568 if maybe_func and check_func: 4569 index2 = self._index 4570 peek = self._parse_string() 4571 4572 if not peek: 4573 self._retreat(index) 4574 return None 4575 4576 self._retreat(index2) 4577 4578 if not this: 4579 if self._match_text_seq("UNSIGNED"): 4580 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4581 if not unsigned_type_token: 4582 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4583 4584 type_token = unsigned_type_token or type_token 4585 4586 this = exp.DataType( 4587 this=exp.DataType.Type[type_token.value], 4588 expressions=expressions, 4589 nested=nested, 4590 prefix=prefix, 4591 ) 4592 4593 # Empty arrays/structs are allowed 4594 if values is not None: 4595 cls = exp.Struct if is_struct else exp.Array 4596 this = exp.cast(cls(expressions=values), this, copy=False) 4597 4598 elif expressions: 4599 this.set("expressions", expressions) 4600 4601 # https://materialize.com/docs/sql/types/list/#type-name 4602 while self._match(TokenType.LIST): 4603 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4604 4605 index = self._index 4606 4607 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4608 matched_array = self._match(TokenType.ARRAY) 4609 4610 while self._curr: 4611 matched_l_bracket = self._match(TokenType.L_BRACKET) 4612 if not matched_l_bracket and not matched_array: 4613 break 4614 4615 matched_array = False 4616 values = self._parse_csv(self._parse_assignment) or None 4617 if values and not schema: 4618 self._retreat(index) 4619 break 4620 4621 this = exp.DataType( 4622 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4623 ) 4624 self._match(TokenType.R_BRACKET) 4625 4626 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4627 converter = self.TYPE_CONVERTERS.get(this.this) 4628 if converter: 4629 this = converter(t.cast(exp.DataType, this)) 4630 4631 return this 4632 4633 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4634 index = self._index 4635 4636 if ( 4637 self._curr 4638 and self._next 4639 and self._curr.token_type in self.TYPE_TOKENS 4640 and self._next.token_type in self.TYPE_TOKENS 4641 ): 4642 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4643 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4644 this = self._parse_id_var() 4645 else: 4646 this = ( 4647 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4648 or self._parse_id_var() 4649 ) 4650 4651 self._match(TokenType.COLON) 4652 4653 if ( 4654 type_required 4655 and not isinstance(this, exp.DataType) 4656 and not self._match_set(self.TYPE_TOKENS, advance=False) 4657 ): 4658 self._retreat(index) 4659 return self._parse_types() 4660 4661 return self._parse_column_def(this) 4662 4663 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4664 if not self._match_text_seq("AT", "TIME", "ZONE"): 4665 return this 4666 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4667 4668 def _parse_column(self) -> t.Optional[exp.Expression]: 4669 this = self._parse_column_reference() 4670 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4671 4672 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4673 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4674 4675 return column 4676 4677 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4678 this = self._parse_field() 4679 if ( 4680 not this 4681 and self._match(TokenType.VALUES, advance=False) 4682 and self.VALUES_FOLLOWED_BY_PAREN 4683 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4684 ): 4685 this = self._parse_id_var() 4686 4687 if isinstance(this, exp.Identifier): 4688 # We bubble up comments from the Identifier to the Column 4689 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4690 4691 return this 4692 4693 def _parse_colon_as_variant_extract( 4694 self, this: t.Optional[exp.Expression] 4695 ) -> t.Optional[exp.Expression]: 4696 casts = [] 4697 json_path = [] 4698 4699 while self._match(TokenType.COLON): 4700 start_index = self._index 4701 4702 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4703 path = self._parse_column_ops( 4704 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4705 ) 4706 4707 # The cast :: operator has a lower precedence than the extraction operator :, so 4708 # we rearrange the AST appropriately to avoid casting the JSON path 4709 while isinstance(path, exp.Cast): 4710 casts.append(path.to) 4711 path = path.this 4712 4713 if casts: 4714 dcolon_offset = next( 4715 i 4716 for i, t in enumerate(self._tokens[start_index:]) 4717 if t.token_type == TokenType.DCOLON 4718 ) 4719 end_token = self._tokens[start_index + dcolon_offset - 1] 4720 else: 4721 end_token = self._prev 4722 4723 if path: 4724 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4725 4726 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4727 # Databricks transforms it back to the colon/dot notation 4728 if json_path: 4729 this = self.expression( 4730 exp.JSONExtract, 4731 this=this, 4732 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4733 variant_extract=True, 4734 ) 4735 4736 while casts: 4737 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4738 4739 return this 4740 4741 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4742 return self._parse_types() 4743 4744 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4745 this = self._parse_bracket(this) 4746 4747 while self._match_set(self.COLUMN_OPERATORS): 4748 op_token = self._prev.token_type 4749 op = self.COLUMN_OPERATORS.get(op_token) 4750 4751 if op_token == TokenType.DCOLON: 4752 field = self._parse_dcolon() 4753 if not field: 4754 self.raise_error("Expected type") 4755 elif op and self._curr: 4756 field = self._parse_column_reference() 4757 else: 4758 field = self._parse_field(any_token=True, anonymous_func=True) 4759 4760 if isinstance(field, exp.Func) and this: 4761 # bigquery allows function calls like x.y.count(...) 4762 # SAFE.SUBSTR(...) 4763 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4764 this = exp.replace_tree( 4765 this, 4766 lambda n: ( 4767 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4768 if n.table 4769 else n.this 4770 ) 4771 if isinstance(n, exp.Column) 4772 else n, 4773 ) 4774 4775 if op: 4776 this = op(self, this, field) 4777 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4778 this = self.expression( 4779 exp.Column, 4780 this=field, 4781 table=this.this, 4782 db=this.args.get("table"), 4783 catalog=this.args.get("db"), 4784 ) 4785 else: 4786 this = self.expression(exp.Dot, this=this, expression=field) 4787 4788 this = self._parse_bracket(this) 4789 4790 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4791 4792 def _parse_primary(self) -> t.Optional[exp.Expression]: 4793 if self._match_set(self.PRIMARY_PARSERS): 4794 token_type = self._prev.token_type 4795 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4796 4797 if token_type == TokenType.STRING: 4798 expressions = [primary] 4799 while self._match(TokenType.STRING): 4800 expressions.append(exp.Literal.string(self._prev.text)) 4801 4802 if len(expressions) > 1: 4803 return self.expression(exp.Concat, expressions=expressions) 4804 4805 return primary 4806 4807 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4808 return exp.Literal.number(f"0.{self._prev.text}") 4809 4810 if self._match(TokenType.L_PAREN): 4811 comments = self._prev_comments 4812 query = self._parse_select() 4813 4814 if query: 4815 expressions = [query] 4816 else: 4817 expressions = self._parse_expressions() 4818 4819 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4820 4821 if not this and self._match(TokenType.R_PAREN, advance=False): 4822 this = self.expression(exp.Tuple) 4823 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4824 this = self._parse_subquery(this=this, parse_alias=False) 4825 elif isinstance(this, exp.Subquery): 4826 this = self._parse_subquery( 4827 this=self._parse_set_operations(this), parse_alias=False 4828 ) 4829 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4830 this = self.expression(exp.Tuple, expressions=expressions) 4831 else: 4832 this = self.expression(exp.Paren, this=this) 4833 4834 if this: 4835 this.add_comments(comments) 4836 4837 self._match_r_paren(expression=this) 4838 return this 4839 4840 return None 4841 4842 def _parse_field( 4843 self, 4844 any_token: bool = False, 4845 tokens: t.Optional[t.Collection[TokenType]] = None, 4846 anonymous_func: bool = False, 4847 ) -> t.Optional[exp.Expression]: 4848 if anonymous_func: 4849 field = ( 4850 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4851 or self._parse_primary() 4852 ) 4853 else: 4854 field = self._parse_primary() or self._parse_function( 4855 anonymous=anonymous_func, any_token=any_token 4856 ) 4857 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4858 4859 def _parse_function( 4860 self, 4861 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4862 anonymous: bool = False, 4863 optional_parens: bool = True, 4864 any_token: bool = False, 4865 ) -> t.Optional[exp.Expression]: 4866 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4867 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4868 fn_syntax = False 4869 if ( 4870 self._match(TokenType.L_BRACE, advance=False) 4871 and self._next 4872 and self._next.text.upper() == "FN" 4873 ): 4874 self._advance(2) 4875 fn_syntax = True 4876 4877 func = self._parse_function_call( 4878 functions=functions, 4879 anonymous=anonymous, 4880 optional_parens=optional_parens, 4881 any_token=any_token, 4882 ) 4883 4884 if fn_syntax: 4885 self._match(TokenType.R_BRACE) 4886 4887 return func 4888 4889 def _parse_function_call( 4890 self, 4891 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4892 anonymous: bool = False, 4893 optional_parens: bool = True, 4894 any_token: bool = False, 4895 ) -> t.Optional[exp.Expression]: 4896 if not self._curr: 4897 return None 4898 4899 comments = self._curr.comments 4900 token_type = self._curr.token_type 4901 this = self._curr.text 4902 upper = this.upper() 4903 4904 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4905 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4906 self._advance() 4907 return self._parse_window(parser(self)) 4908 4909 if not self._next or self._next.token_type != TokenType.L_PAREN: 4910 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4911 self._advance() 4912 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4913 4914 return None 4915 4916 if any_token: 4917 if token_type in self.RESERVED_TOKENS: 4918 return None 4919 elif token_type not in self.FUNC_TOKENS: 4920 return None 4921 4922 self._advance(2) 4923 4924 parser = self.FUNCTION_PARSERS.get(upper) 4925 if parser and not anonymous: 4926 this = parser(self) 4927 else: 4928 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4929 4930 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4931 this = self.expression(subquery_predicate, this=self._parse_select()) 4932 self._match_r_paren() 4933 return this 4934 4935 if functions is None: 4936 functions = self.FUNCTIONS 4937 4938 function = functions.get(upper) 4939 4940 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4941 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4942 4943 if alias: 4944 args = self._kv_to_prop_eq(args) 4945 4946 if function and not anonymous: 4947 if "dialect" in function.__code__.co_varnames: 4948 func = function(args, dialect=self.dialect) 4949 else: 4950 func = function(args) 4951 4952 func = self.validate_expression(func, args) 4953 if not self.dialect.NORMALIZE_FUNCTIONS: 4954 func.meta["name"] = this 4955 4956 this = func 4957 else: 4958 if token_type == TokenType.IDENTIFIER: 4959 this = exp.Identifier(this=this, quoted=True) 4960 this = self.expression(exp.Anonymous, this=this, expressions=args) 4961 4962 if isinstance(this, exp.Expression): 4963 this.add_comments(comments) 4964 4965 self._match_r_paren(this) 4966 return self._parse_window(this) 4967 4968 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4969 transformed = [] 4970 4971 for e in expressions: 4972 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4973 if isinstance(e, exp.Alias): 4974 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4975 4976 if not isinstance(e, exp.PropertyEQ): 4977 e = self.expression( 4978 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4979 ) 4980 4981 if isinstance(e.this, exp.Column): 4982 e.this.replace(e.this.this) 4983 4984 transformed.append(e) 4985 4986 return transformed 4987 4988 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4989 return self._parse_column_def(self._parse_id_var()) 4990 4991 def _parse_user_defined_function( 4992 self, kind: t.Optional[TokenType] = None 4993 ) -> t.Optional[exp.Expression]: 4994 this = self._parse_id_var() 4995 4996 while self._match(TokenType.DOT): 4997 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4998 4999 if not self._match(TokenType.L_PAREN): 5000 return this 5001 5002 expressions = self._parse_csv(self._parse_function_parameter) 5003 self._match_r_paren() 5004 return self.expression( 5005 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5006 ) 5007 5008 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5009 literal = self._parse_primary() 5010 if literal: 5011 return self.expression(exp.Introducer, this=token.text, expression=literal) 5012 5013 return self.expression(exp.Identifier, this=token.text) 5014 5015 def _parse_session_parameter(self) -> exp.SessionParameter: 5016 kind = None 5017 this = self._parse_id_var() or self._parse_primary() 5018 5019 if this and self._match(TokenType.DOT): 5020 kind = this.name 5021 this = self._parse_var() or self._parse_primary() 5022 5023 return self.expression(exp.SessionParameter, this=this, kind=kind) 5024 5025 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5026 return self._parse_id_var() 5027 5028 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5029 index = self._index 5030 5031 if self._match(TokenType.L_PAREN): 5032 expressions = t.cast( 5033 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5034 ) 5035 5036 if not self._match(TokenType.R_PAREN): 5037 self._retreat(index) 5038 else: 5039 expressions = [self._parse_lambda_arg()] 5040 5041 if self._match_set(self.LAMBDAS): 5042 return self.LAMBDAS[self._prev.token_type](self, expressions) 5043 5044 self._retreat(index) 5045 5046 this: t.Optional[exp.Expression] 5047 5048 if self._match(TokenType.DISTINCT): 5049 this = self.expression( 5050 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5051 ) 5052 else: 5053 this = self._parse_select_or_expression(alias=alias) 5054 5055 return self._parse_limit( 5056 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5057 ) 5058 5059 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5060 index = self._index 5061 if not self._match(TokenType.L_PAREN): 5062 return this 5063 5064 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5065 # expr can be of both types 5066 if self._match_set(self.SELECT_START_TOKENS): 5067 self._retreat(index) 5068 return this 5069 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5070 self._match_r_paren() 5071 return self.expression(exp.Schema, this=this, expressions=args) 5072 5073 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5074 return self._parse_column_def(self._parse_field(any_token=True)) 5075 5076 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5077 # column defs are not really columns, they're identifiers 5078 if isinstance(this, exp.Column): 5079 this = this.this 5080 5081 kind = self._parse_types(schema=True) 5082 5083 if self._match_text_seq("FOR", "ORDINALITY"): 5084 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5085 5086 constraints: t.List[exp.Expression] = [] 5087 5088 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5089 ("ALIAS", "MATERIALIZED") 5090 ): 5091 persisted = self._prev.text.upper() == "MATERIALIZED" 5092 constraints.append( 5093 self.expression( 5094 exp.ComputedColumnConstraint, 5095 this=self._parse_assignment(), 5096 persisted=persisted or self._match_text_seq("PERSISTED"), 5097 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5098 ) 5099 ) 5100 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5101 self._match(TokenType.ALIAS) 5102 constraints.append( 5103 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5104 ) 5105 5106 while True: 5107 constraint = self._parse_column_constraint() 5108 if not constraint: 5109 break 5110 constraints.append(constraint) 5111 5112 if not kind and not constraints: 5113 return this 5114 5115 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5116 5117 def _parse_auto_increment( 5118 self, 5119 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5120 start = None 5121 increment = None 5122 5123 if self._match(TokenType.L_PAREN, advance=False): 5124 args = self._parse_wrapped_csv(self._parse_bitwise) 5125 start = seq_get(args, 0) 5126 increment = seq_get(args, 1) 5127 elif self._match_text_seq("START"): 5128 start = self._parse_bitwise() 5129 self._match_text_seq("INCREMENT") 5130 increment = self._parse_bitwise() 5131 5132 if start and increment: 5133 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5134 5135 return exp.AutoIncrementColumnConstraint() 5136 5137 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5138 if not self._match_text_seq("REFRESH"): 5139 self._retreat(self._index - 1) 5140 return None 5141 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5142 5143 def _parse_compress(self) -> exp.CompressColumnConstraint: 5144 if self._match(TokenType.L_PAREN, advance=False): 5145 return self.expression( 5146 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5147 ) 5148 5149 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5150 5151 def _parse_generated_as_identity( 5152 self, 5153 ) -> ( 5154 exp.GeneratedAsIdentityColumnConstraint 5155 | exp.ComputedColumnConstraint 5156 | exp.GeneratedAsRowColumnConstraint 5157 ): 5158 if self._match_text_seq("BY", "DEFAULT"): 5159 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5160 this = self.expression( 5161 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5162 ) 5163 else: 5164 self._match_text_seq("ALWAYS") 5165 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5166 5167 self._match(TokenType.ALIAS) 5168 5169 if self._match_text_seq("ROW"): 5170 start = self._match_text_seq("START") 5171 if not start: 5172 self._match(TokenType.END) 5173 hidden = self._match_text_seq("HIDDEN") 5174 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5175 5176 identity = self._match_text_seq("IDENTITY") 5177 5178 if self._match(TokenType.L_PAREN): 5179 if self._match(TokenType.START_WITH): 5180 this.set("start", self._parse_bitwise()) 5181 if self._match_text_seq("INCREMENT", "BY"): 5182 this.set("increment", self._parse_bitwise()) 5183 if self._match_text_seq("MINVALUE"): 5184 this.set("minvalue", self._parse_bitwise()) 5185 if self._match_text_seq("MAXVALUE"): 5186 this.set("maxvalue", self._parse_bitwise()) 5187 5188 if self._match_text_seq("CYCLE"): 5189 this.set("cycle", True) 5190 elif self._match_text_seq("NO", "CYCLE"): 5191 this.set("cycle", False) 5192 5193 if not identity: 5194 this.set("expression", self._parse_range()) 5195 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5196 args = self._parse_csv(self._parse_bitwise) 5197 this.set("start", seq_get(args, 0)) 5198 this.set("increment", seq_get(args, 1)) 5199 5200 self._match_r_paren() 5201 5202 return this 5203 5204 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5205 self._match_text_seq("LENGTH") 5206 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5207 5208 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5209 if self._match_text_seq("NULL"): 5210 return self.expression(exp.NotNullColumnConstraint) 5211 if self._match_text_seq("CASESPECIFIC"): 5212 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5213 if self._match_text_seq("FOR", "REPLICATION"): 5214 return self.expression(exp.NotForReplicationColumnConstraint) 5215 return None 5216 5217 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5218 if self._match(TokenType.CONSTRAINT): 5219 this = self._parse_id_var() 5220 else: 5221 this = None 5222 5223 if self._match_texts(self.CONSTRAINT_PARSERS): 5224 return self.expression( 5225 exp.ColumnConstraint, 5226 this=this, 5227 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5228 ) 5229 5230 return this 5231 5232 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5233 if not self._match(TokenType.CONSTRAINT): 5234 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5235 5236 return self.expression( 5237 exp.Constraint, 5238 this=self._parse_id_var(), 5239 expressions=self._parse_unnamed_constraints(), 5240 ) 5241 5242 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5243 constraints = [] 5244 while True: 5245 constraint = self._parse_unnamed_constraint() or self._parse_function() 5246 if not constraint: 5247 break 5248 constraints.append(constraint) 5249 5250 return constraints 5251 5252 def _parse_unnamed_constraint( 5253 self, constraints: t.Optional[t.Collection[str]] = None 5254 ) -> t.Optional[exp.Expression]: 5255 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5256 constraints or self.CONSTRAINT_PARSERS 5257 ): 5258 return None 5259 5260 constraint = self._prev.text.upper() 5261 if constraint not in self.CONSTRAINT_PARSERS: 5262 self.raise_error(f"No parser found for schema constraint {constraint}.") 5263 5264 return self.CONSTRAINT_PARSERS[constraint](self) 5265 5266 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5267 return self._parse_id_var(any_token=False) 5268 5269 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5270 self._match_text_seq("KEY") 5271 return self.expression( 5272 exp.UniqueColumnConstraint, 5273 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5274 this=self._parse_schema(self._parse_unique_key()), 5275 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5276 on_conflict=self._parse_on_conflict(), 5277 ) 5278 5279 def _parse_key_constraint_options(self) -> t.List[str]: 5280 options = [] 5281 while True: 5282 if not self._curr: 5283 break 5284 5285 if self._match(TokenType.ON): 5286 action = None 5287 on = self._advance_any() and self._prev.text 5288 5289 if self._match_text_seq("NO", "ACTION"): 5290 action = "NO ACTION" 5291 elif self._match_text_seq("CASCADE"): 5292 action = "CASCADE" 5293 elif self._match_text_seq("RESTRICT"): 5294 action = "RESTRICT" 5295 elif self._match_pair(TokenType.SET, TokenType.NULL): 5296 action = "SET NULL" 5297 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5298 action = "SET DEFAULT" 5299 else: 5300 self.raise_error("Invalid key constraint") 5301 5302 options.append(f"ON {on} {action}") 5303 else: 5304 var = self._parse_var_from_options( 5305 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5306 ) 5307 if not var: 5308 break 5309 options.append(var.name) 5310 5311 return options 5312 5313 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5314 if match and not self._match(TokenType.REFERENCES): 5315 return None 5316 5317 expressions = None 5318 this = self._parse_table(schema=True) 5319 options = self._parse_key_constraint_options() 5320 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5321 5322 def _parse_foreign_key(self) -> exp.ForeignKey: 5323 expressions = self._parse_wrapped_id_vars() 5324 reference = self._parse_references() 5325 options = {} 5326 5327 while self._match(TokenType.ON): 5328 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5329 self.raise_error("Expected DELETE or UPDATE") 5330 5331 kind = self._prev.text.lower() 5332 5333 if self._match_text_seq("NO", "ACTION"): 5334 action = "NO ACTION" 5335 elif self._match(TokenType.SET): 5336 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5337 action = "SET " + self._prev.text.upper() 5338 else: 5339 self._advance() 5340 action = self._prev.text.upper() 5341 5342 options[kind] = action 5343 5344 return self.expression( 5345 exp.ForeignKey, 5346 expressions=expressions, 5347 reference=reference, 5348 **options, # type: ignore 5349 ) 5350 5351 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5352 return self._parse_field() 5353 5354 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5355 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5356 self._retreat(self._index - 1) 5357 return None 5358 5359 id_vars = self._parse_wrapped_id_vars() 5360 return self.expression( 5361 exp.PeriodForSystemTimeConstraint, 5362 this=seq_get(id_vars, 0), 5363 expression=seq_get(id_vars, 1), 5364 ) 5365 5366 def _parse_primary_key( 5367 self, wrapped_optional: bool = False, in_props: bool = False 5368 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5369 desc = ( 5370 self._match_set((TokenType.ASC, TokenType.DESC)) 5371 and self._prev.token_type == TokenType.DESC 5372 ) 5373 5374 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5375 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5376 5377 expressions = self._parse_wrapped_csv( 5378 self._parse_primary_key_part, optional=wrapped_optional 5379 ) 5380 options = self._parse_key_constraint_options() 5381 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5382 5383 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5384 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5385 5386 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5387 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5388 return this 5389 5390 bracket_kind = self._prev.token_type 5391 expressions = self._parse_csv( 5392 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5393 ) 5394 5395 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5396 self.raise_error("Expected ]") 5397 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5398 self.raise_error("Expected }") 5399 5400 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5401 if bracket_kind == TokenType.L_BRACE: 5402 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5403 elif not this: 5404 this = self.expression(exp.Array, expressions=expressions) 5405 else: 5406 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5407 if constructor_type: 5408 return self.expression(constructor_type, expressions=expressions) 5409 5410 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5411 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5412 5413 self._add_comments(this) 5414 return self._parse_bracket(this) 5415 5416 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5417 if self._match(TokenType.COLON): 5418 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5419 return this 5420 5421 def _parse_case(self) -> t.Optional[exp.Expression]: 5422 ifs = [] 5423 default = None 5424 5425 comments = self._prev_comments 5426 expression = self._parse_assignment() 5427 5428 while self._match(TokenType.WHEN): 5429 this = self._parse_assignment() 5430 self._match(TokenType.THEN) 5431 then = self._parse_assignment() 5432 ifs.append(self.expression(exp.If, this=this, true=then)) 5433 5434 if self._match(TokenType.ELSE): 5435 default = self._parse_assignment() 5436 5437 if not self._match(TokenType.END): 5438 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5439 default = exp.column("interval") 5440 else: 5441 self.raise_error("Expected END after CASE", self._prev) 5442 5443 return self.expression( 5444 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5445 ) 5446 5447 def _parse_if(self) -> t.Optional[exp.Expression]: 5448 if self._match(TokenType.L_PAREN): 5449 args = self._parse_csv(self._parse_assignment) 5450 this = self.validate_expression(exp.If.from_arg_list(args), args) 5451 self._match_r_paren() 5452 else: 5453 index = self._index - 1 5454 5455 if self.NO_PAREN_IF_COMMANDS and index == 0: 5456 return self._parse_as_command(self._prev) 5457 5458 condition = self._parse_assignment() 5459 5460 if not condition: 5461 self._retreat(index) 5462 return None 5463 5464 self._match(TokenType.THEN) 5465 true = self._parse_assignment() 5466 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5467 self._match(TokenType.END) 5468 this = self.expression(exp.If, this=condition, true=true, false=false) 5469 5470 return this 5471 5472 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5473 if not self._match_text_seq("VALUE", "FOR"): 5474 self._retreat(self._index - 1) 5475 return None 5476 5477 return self.expression( 5478 exp.NextValueFor, 5479 this=self._parse_column(), 5480 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5481 ) 5482 5483 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5484 this = self._parse_function() or self._parse_var_or_string(upper=True) 5485 5486 if self._match(TokenType.FROM): 5487 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5488 5489 if not self._match(TokenType.COMMA): 5490 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5491 5492 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5493 5494 def _parse_gap_fill(self) -> exp.GapFill: 5495 self._match(TokenType.TABLE) 5496 this = self._parse_table() 5497 5498 self._match(TokenType.COMMA) 5499 args = [this, *self._parse_csv(self._parse_lambda)] 5500 5501 gap_fill = exp.GapFill.from_arg_list(args) 5502 return self.validate_expression(gap_fill, args) 5503 5504 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5505 this = self._parse_assignment() 5506 5507 if not self._match(TokenType.ALIAS): 5508 if self._match(TokenType.COMMA): 5509 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5510 5511 self.raise_error("Expected AS after CAST") 5512 5513 fmt = None 5514 to = self._parse_types() 5515 5516 if self._match(TokenType.FORMAT): 5517 fmt_string = self._parse_string() 5518 fmt = self._parse_at_time_zone(fmt_string) 5519 5520 if not to: 5521 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5522 if to.this in exp.DataType.TEMPORAL_TYPES: 5523 this = self.expression( 5524 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5525 this=this, 5526 format=exp.Literal.string( 5527 format_time( 5528 fmt_string.this if fmt_string else "", 5529 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5530 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5531 ) 5532 ), 5533 safe=safe, 5534 ) 5535 5536 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5537 this.set("zone", fmt.args["zone"]) 5538 return this 5539 elif not to: 5540 self.raise_error("Expected TYPE after CAST") 5541 elif isinstance(to, exp.Identifier): 5542 to = exp.DataType.build(to.name, udt=True) 5543 elif to.this == exp.DataType.Type.CHAR: 5544 if self._match(TokenType.CHARACTER_SET): 5545 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5546 5547 return self.expression( 5548 exp.Cast if strict else exp.TryCast, 5549 this=this, 5550 to=to, 5551 format=fmt, 5552 safe=safe, 5553 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5554 ) 5555 5556 def _parse_string_agg(self) -> exp.Expression: 5557 if self._match(TokenType.DISTINCT): 5558 args: t.List[t.Optional[exp.Expression]] = [ 5559 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5560 ] 5561 if self._match(TokenType.COMMA): 5562 args.extend(self._parse_csv(self._parse_assignment)) 5563 else: 5564 args = self._parse_csv(self._parse_assignment) # type: ignore 5565 5566 index = self._index 5567 if not self._match(TokenType.R_PAREN) and args: 5568 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5569 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5570 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5571 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5572 5573 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5574 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5575 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5576 if not self._match_text_seq("WITHIN", "GROUP"): 5577 self._retreat(index) 5578 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5579 5580 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5581 order = self._parse_order(this=seq_get(args, 0)) 5582 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5583 5584 def _parse_convert( 5585 self, strict: bool, safe: t.Optional[bool] = None 5586 ) -> t.Optional[exp.Expression]: 5587 this = self._parse_bitwise() 5588 5589 if self._match(TokenType.USING): 5590 to: t.Optional[exp.Expression] = self.expression( 5591 exp.CharacterSet, this=self._parse_var() 5592 ) 5593 elif self._match(TokenType.COMMA): 5594 to = self._parse_types() 5595 else: 5596 to = None 5597 5598 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5599 5600 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5601 """ 5602 There are generally two variants of the DECODE function: 5603 5604 - DECODE(bin, charset) 5605 - DECODE(expression, search, result [, search, result] ... [, default]) 5606 5607 The second variant will always be parsed into a CASE expression. Note that NULL 5608 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5609 instead of relying on pattern matching. 5610 """ 5611 args = self._parse_csv(self._parse_assignment) 5612 5613 if len(args) < 3: 5614 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5615 5616 expression, *expressions = args 5617 if not expression: 5618 return None 5619 5620 ifs = [] 5621 for search, result in zip(expressions[::2], expressions[1::2]): 5622 if not search or not result: 5623 return None 5624 5625 if isinstance(search, exp.Literal): 5626 ifs.append( 5627 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5628 ) 5629 elif isinstance(search, exp.Null): 5630 ifs.append( 5631 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5632 ) 5633 else: 5634 cond = exp.or_( 5635 exp.EQ(this=expression.copy(), expression=search), 5636 exp.and_( 5637 exp.Is(this=expression.copy(), expression=exp.Null()), 5638 exp.Is(this=search.copy(), expression=exp.Null()), 5639 copy=False, 5640 ), 5641 copy=False, 5642 ) 5643 ifs.append(exp.If(this=cond, true=result)) 5644 5645 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5646 5647 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5648 self._match_text_seq("KEY") 5649 key = self._parse_column() 5650 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5651 self._match_text_seq("VALUE") 5652 value = self._parse_bitwise() 5653 5654 if not key and not value: 5655 return None 5656 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5657 5658 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5659 if not this or not self._match_text_seq("FORMAT", "JSON"): 5660 return this 5661 5662 return self.expression(exp.FormatJson, this=this) 5663 5664 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5665 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5666 for value in values: 5667 if self._match_text_seq(value, "ON", on): 5668 return f"{value} ON {on}" 5669 5670 return None 5671 5672 @t.overload 5673 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5674 5675 @t.overload 5676 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5677 5678 def _parse_json_object(self, agg=False): 5679 star = self._parse_star() 5680 expressions = ( 5681 [star] 5682 if star 5683 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5684 ) 5685 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5686 5687 unique_keys = None 5688 if self._match_text_seq("WITH", "UNIQUE"): 5689 unique_keys = True 5690 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5691 unique_keys = False 5692 5693 self._match_text_seq("KEYS") 5694 5695 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5696 self._parse_type() 5697 ) 5698 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5699 5700 return self.expression( 5701 exp.JSONObjectAgg if agg else exp.JSONObject, 5702 expressions=expressions, 5703 null_handling=null_handling, 5704 unique_keys=unique_keys, 5705 return_type=return_type, 5706 encoding=encoding, 5707 ) 5708 5709 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5710 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5711 if not self._match_text_seq("NESTED"): 5712 this = self._parse_id_var() 5713 kind = self._parse_types(allow_identifiers=False) 5714 nested = None 5715 else: 5716 this = None 5717 kind = None 5718 nested = True 5719 5720 path = self._match_text_seq("PATH") and self._parse_string() 5721 nested_schema = nested and self._parse_json_schema() 5722 5723 return self.expression( 5724 exp.JSONColumnDef, 5725 this=this, 5726 kind=kind, 5727 path=path, 5728 nested_schema=nested_schema, 5729 ) 5730 5731 def _parse_json_schema(self) -> exp.JSONSchema: 5732 self._match_text_seq("COLUMNS") 5733 return self.expression( 5734 exp.JSONSchema, 5735 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5736 ) 5737 5738 def _parse_json_table(self) -> exp.JSONTable: 5739 this = self._parse_format_json(self._parse_bitwise()) 5740 path = self._match(TokenType.COMMA) and self._parse_string() 5741 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5742 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5743 schema = self._parse_json_schema() 5744 5745 return exp.JSONTable( 5746 this=this, 5747 schema=schema, 5748 path=path, 5749 error_handling=error_handling, 5750 empty_handling=empty_handling, 5751 ) 5752 5753 def _parse_match_against(self) -> exp.MatchAgainst: 5754 expressions = self._parse_csv(self._parse_column) 5755 5756 self._match_text_seq(")", "AGAINST", "(") 5757 5758 this = self._parse_string() 5759 5760 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5761 modifier = "IN NATURAL LANGUAGE MODE" 5762 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5763 modifier = f"{modifier} WITH QUERY EXPANSION" 5764 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5765 modifier = "IN BOOLEAN MODE" 5766 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5767 modifier = "WITH QUERY EXPANSION" 5768 else: 5769 modifier = None 5770 5771 return self.expression( 5772 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5773 ) 5774 5775 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5776 def _parse_open_json(self) -> exp.OpenJSON: 5777 this = self._parse_bitwise() 5778 path = self._match(TokenType.COMMA) and self._parse_string() 5779 5780 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5781 this = self._parse_field(any_token=True) 5782 kind = self._parse_types() 5783 path = self._parse_string() 5784 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5785 5786 return self.expression( 5787 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5788 ) 5789 5790 expressions = None 5791 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5792 self._match_l_paren() 5793 expressions = self._parse_csv(_parse_open_json_column_def) 5794 5795 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5796 5797 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5798 args = self._parse_csv(self._parse_bitwise) 5799 5800 if self._match(TokenType.IN): 5801 return self.expression( 5802 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5803 ) 5804 5805 if haystack_first: 5806 haystack = seq_get(args, 0) 5807 needle = seq_get(args, 1) 5808 else: 5809 needle = seq_get(args, 0) 5810 haystack = seq_get(args, 1) 5811 5812 return self.expression( 5813 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5814 ) 5815 5816 def _parse_predict(self) -> exp.Predict: 5817 self._match_text_seq("MODEL") 5818 this = self._parse_table() 5819 5820 self._match(TokenType.COMMA) 5821 self._match_text_seq("TABLE") 5822 5823 return self.expression( 5824 exp.Predict, 5825 this=this, 5826 expression=self._parse_table(), 5827 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5828 ) 5829 5830 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5831 args = self._parse_csv(self._parse_table) 5832 return exp.JoinHint(this=func_name.upper(), expressions=args) 5833 5834 def _parse_substring(self) -> exp.Substring: 5835 # Postgres supports the form: substring(string [from int] [for int]) 5836 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5837 5838 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5839 5840 if self._match(TokenType.FROM): 5841 args.append(self._parse_bitwise()) 5842 if self._match(TokenType.FOR): 5843 if len(args) == 1: 5844 args.append(exp.Literal.number(1)) 5845 args.append(self._parse_bitwise()) 5846 5847 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5848 5849 def _parse_trim(self) -> exp.Trim: 5850 # https://www.w3resource.com/sql/character-functions/trim.php 5851 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5852 5853 position = None 5854 collation = None 5855 expression = None 5856 5857 if self._match_texts(self.TRIM_TYPES): 5858 position = self._prev.text.upper() 5859 5860 this = self._parse_bitwise() 5861 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5862 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5863 expression = self._parse_bitwise() 5864 5865 if invert_order: 5866 this, expression = expression, this 5867 5868 if self._match(TokenType.COLLATE): 5869 collation = self._parse_bitwise() 5870 5871 return self.expression( 5872 exp.Trim, this=this, position=position, expression=expression, collation=collation 5873 ) 5874 5875 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5876 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5877 5878 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5879 return self._parse_window(self._parse_id_var(), alias=True) 5880 5881 def _parse_respect_or_ignore_nulls( 5882 self, this: t.Optional[exp.Expression] 5883 ) -> t.Optional[exp.Expression]: 5884 if self._match_text_seq("IGNORE", "NULLS"): 5885 return self.expression(exp.IgnoreNulls, this=this) 5886 if self._match_text_seq("RESPECT", "NULLS"): 5887 return self.expression(exp.RespectNulls, this=this) 5888 return this 5889 5890 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5891 if self._match(TokenType.HAVING): 5892 self._match_texts(("MAX", "MIN")) 5893 max = self._prev.text.upper() != "MIN" 5894 return self.expression( 5895 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5896 ) 5897 5898 return this 5899 5900 def _parse_window( 5901 self, this: t.Optional[exp.Expression], alias: bool = False 5902 ) -> t.Optional[exp.Expression]: 5903 func = this 5904 comments = func.comments if isinstance(func, exp.Expression) else None 5905 5906 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5907 self._match(TokenType.WHERE) 5908 this = self.expression( 5909 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5910 ) 5911 self._match_r_paren() 5912 5913 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5914 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5915 if self._match_text_seq("WITHIN", "GROUP"): 5916 order = self._parse_wrapped(self._parse_order) 5917 this = self.expression(exp.WithinGroup, this=this, expression=order) 5918 5919 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5920 # Some dialects choose to implement and some do not. 5921 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5922 5923 # There is some code above in _parse_lambda that handles 5924 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5925 5926 # The below changes handle 5927 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5928 5929 # Oracle allows both formats 5930 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5931 # and Snowflake chose to do the same for familiarity 5932 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5933 if isinstance(this, exp.AggFunc): 5934 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5935 5936 if ignore_respect and ignore_respect is not this: 5937 ignore_respect.replace(ignore_respect.this) 5938 this = self.expression(ignore_respect.__class__, this=this) 5939 5940 this = self._parse_respect_or_ignore_nulls(this) 5941 5942 # bigquery select from window x AS (partition by ...) 5943 if alias: 5944 over = None 5945 self._match(TokenType.ALIAS) 5946 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5947 return this 5948 else: 5949 over = self._prev.text.upper() 5950 5951 if comments and isinstance(func, exp.Expression): 5952 func.pop_comments() 5953 5954 if not self._match(TokenType.L_PAREN): 5955 return self.expression( 5956 exp.Window, 5957 comments=comments, 5958 this=this, 5959 alias=self._parse_id_var(False), 5960 over=over, 5961 ) 5962 5963 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5964 5965 first = self._match(TokenType.FIRST) 5966 if self._match_text_seq("LAST"): 5967 first = False 5968 5969 partition, order = self._parse_partition_and_order() 5970 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5971 5972 if kind: 5973 self._match(TokenType.BETWEEN) 5974 start = self._parse_window_spec() 5975 self._match(TokenType.AND) 5976 end = self._parse_window_spec() 5977 5978 spec = self.expression( 5979 exp.WindowSpec, 5980 kind=kind, 5981 start=start["value"], 5982 start_side=start["side"], 5983 end=end["value"], 5984 end_side=end["side"], 5985 ) 5986 else: 5987 spec = None 5988 5989 self._match_r_paren() 5990 5991 window = self.expression( 5992 exp.Window, 5993 comments=comments, 5994 this=this, 5995 partition_by=partition, 5996 order=order, 5997 spec=spec, 5998 alias=window_alias, 5999 over=over, 6000 first=first, 6001 ) 6002 6003 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6004 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6005 return self._parse_window(window, alias=alias) 6006 6007 return window 6008 6009 def _parse_partition_and_order( 6010 self, 6011 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6012 return self._parse_partition_by(), self._parse_order() 6013 6014 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6015 self._match(TokenType.BETWEEN) 6016 6017 return { 6018 "value": ( 6019 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6020 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6021 or self._parse_bitwise() 6022 ), 6023 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6024 } 6025 6026 def _parse_alias( 6027 self, this: t.Optional[exp.Expression], explicit: bool = False 6028 ) -> t.Optional[exp.Expression]: 6029 any_token = self._match(TokenType.ALIAS) 6030 comments = self._prev_comments or [] 6031 6032 if explicit and not any_token: 6033 return this 6034 6035 if self._match(TokenType.L_PAREN): 6036 aliases = self.expression( 6037 exp.Aliases, 6038 comments=comments, 6039 this=this, 6040 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6041 ) 6042 self._match_r_paren(aliases) 6043 return aliases 6044 6045 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6046 self.STRING_ALIASES and self._parse_string_as_identifier() 6047 ) 6048 6049 if alias: 6050 comments.extend(alias.pop_comments()) 6051 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6052 column = this.this 6053 6054 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6055 if not this.comments and column and column.comments: 6056 this.comments = column.pop_comments() 6057 6058 return this 6059 6060 def _parse_id_var( 6061 self, 6062 any_token: bool = True, 6063 tokens: t.Optional[t.Collection[TokenType]] = None, 6064 ) -> t.Optional[exp.Expression]: 6065 expression = self._parse_identifier() 6066 if not expression and ( 6067 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6068 ): 6069 quoted = self._prev.token_type == TokenType.STRING 6070 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6071 6072 return expression 6073 6074 def _parse_string(self) -> t.Optional[exp.Expression]: 6075 if self._match_set(self.STRING_PARSERS): 6076 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6077 return self._parse_placeholder() 6078 6079 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6080 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6081 6082 def _parse_number(self) -> t.Optional[exp.Expression]: 6083 if self._match_set(self.NUMERIC_PARSERS): 6084 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6085 return self._parse_placeholder() 6086 6087 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6088 if self._match(TokenType.IDENTIFIER): 6089 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6090 return self._parse_placeholder() 6091 6092 def _parse_var( 6093 self, 6094 any_token: bool = False, 6095 tokens: t.Optional[t.Collection[TokenType]] = None, 6096 upper: bool = False, 6097 ) -> t.Optional[exp.Expression]: 6098 if ( 6099 (any_token and self._advance_any()) 6100 or self._match(TokenType.VAR) 6101 or (self._match_set(tokens) if tokens else False) 6102 ): 6103 return self.expression( 6104 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6105 ) 6106 return self._parse_placeholder() 6107 6108 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6109 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6110 self._advance() 6111 return self._prev 6112 return None 6113 6114 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6115 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6116 6117 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6118 return self._parse_primary() or self._parse_var(any_token=True) 6119 6120 def _parse_null(self) -> t.Optional[exp.Expression]: 6121 if self._match_set(self.NULL_TOKENS): 6122 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6123 return self._parse_placeholder() 6124 6125 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6126 if self._match(TokenType.TRUE): 6127 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6128 if self._match(TokenType.FALSE): 6129 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6130 return self._parse_placeholder() 6131 6132 def _parse_star(self) -> t.Optional[exp.Expression]: 6133 if self._match(TokenType.STAR): 6134 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6135 return self._parse_placeholder() 6136 6137 def _parse_parameter(self) -> exp.Parameter: 6138 this = self._parse_identifier() or self._parse_primary_or_var() 6139 return self.expression(exp.Parameter, this=this) 6140 6141 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6142 if self._match_set(self.PLACEHOLDER_PARSERS): 6143 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6144 if placeholder: 6145 return placeholder 6146 self._advance(-1) 6147 return None 6148 6149 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6150 if not self._match_texts(keywords): 6151 return None 6152 if self._match(TokenType.L_PAREN, advance=False): 6153 return self._parse_wrapped_csv(self._parse_expression) 6154 6155 expression = self._parse_expression() 6156 return [expression] if expression else None 6157 6158 def _parse_csv( 6159 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6160 ) -> t.List[exp.Expression]: 6161 parse_result = parse_method() 6162 items = [parse_result] if parse_result is not None else [] 6163 6164 while self._match(sep): 6165 self._add_comments(parse_result) 6166 parse_result = parse_method() 6167 if parse_result is not None: 6168 items.append(parse_result) 6169 6170 return items 6171 6172 def _parse_tokens( 6173 self, parse_method: t.Callable, expressions: t.Dict 6174 ) -> t.Optional[exp.Expression]: 6175 this = parse_method() 6176 6177 while self._match_set(expressions): 6178 this = self.expression( 6179 expressions[self._prev.token_type], 6180 this=this, 6181 comments=self._prev_comments, 6182 expression=parse_method(), 6183 ) 6184 6185 return this 6186 6187 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6188 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6189 6190 def _parse_wrapped_csv( 6191 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6192 ) -> t.List[exp.Expression]: 6193 return self._parse_wrapped( 6194 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6195 ) 6196 6197 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6198 wrapped = self._match(TokenType.L_PAREN) 6199 if not wrapped and not optional: 6200 self.raise_error("Expecting (") 6201 parse_result = parse_method() 6202 if wrapped: 6203 self._match_r_paren() 6204 return parse_result 6205 6206 def _parse_expressions(self) -> t.List[exp.Expression]: 6207 return self._parse_csv(self._parse_expression) 6208 6209 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6210 return self._parse_select() or self._parse_set_operations( 6211 self._parse_expression() if alias else self._parse_assignment() 6212 ) 6213 6214 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6215 return self._parse_query_modifiers( 6216 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6217 ) 6218 6219 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6220 this = None 6221 if self._match_texts(self.TRANSACTION_KIND): 6222 this = self._prev.text 6223 6224 self._match_texts(("TRANSACTION", "WORK")) 6225 6226 modes = [] 6227 while True: 6228 mode = [] 6229 while self._match(TokenType.VAR): 6230 mode.append(self._prev.text) 6231 6232 if mode: 6233 modes.append(" ".join(mode)) 6234 if not self._match(TokenType.COMMA): 6235 break 6236 6237 return self.expression(exp.Transaction, this=this, modes=modes) 6238 6239 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6240 chain = None 6241 savepoint = None 6242 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6243 6244 self._match_texts(("TRANSACTION", "WORK")) 6245 6246 if self._match_text_seq("TO"): 6247 self._match_text_seq("SAVEPOINT") 6248 savepoint = self._parse_id_var() 6249 6250 if self._match(TokenType.AND): 6251 chain = not self._match_text_seq("NO") 6252 self._match_text_seq("CHAIN") 6253 6254 if is_rollback: 6255 return self.expression(exp.Rollback, savepoint=savepoint) 6256 6257 return self.expression(exp.Commit, chain=chain) 6258 6259 def _parse_refresh(self) -> exp.Refresh: 6260 self._match(TokenType.TABLE) 6261 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6262 6263 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6264 if not self._match_text_seq("ADD"): 6265 return None 6266 6267 self._match(TokenType.COLUMN) 6268 exists_column = self._parse_exists(not_=True) 6269 expression = self._parse_field_def() 6270 6271 if expression: 6272 expression.set("exists", exists_column) 6273 6274 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6275 if self._match_texts(("FIRST", "AFTER")): 6276 position = self._prev.text 6277 column_position = self.expression( 6278 exp.ColumnPosition, this=self._parse_column(), position=position 6279 ) 6280 expression.set("position", column_position) 6281 6282 return expression 6283 6284 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6285 drop = self._match(TokenType.DROP) and self._parse_drop() 6286 if drop and not isinstance(drop, exp.Command): 6287 drop.set("kind", drop.args.get("kind", "COLUMN")) 6288 return drop 6289 6290 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6291 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6292 return self.expression( 6293 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6294 ) 6295 6296 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6297 index = self._index - 1 6298 6299 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6300 return self._parse_csv( 6301 lambda: self.expression( 6302 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6303 ) 6304 ) 6305 6306 self._retreat(index) 6307 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6308 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6309 6310 if self._match_text_seq("ADD", "COLUMNS"): 6311 schema = self._parse_schema() 6312 if schema: 6313 return [schema] 6314 return [] 6315 6316 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6317 6318 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6319 if self._match_texts(self.ALTER_ALTER_PARSERS): 6320 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6321 6322 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6323 # keyword after ALTER we default to parsing this statement 6324 self._match(TokenType.COLUMN) 6325 column = self._parse_field(any_token=True) 6326 6327 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6328 return self.expression(exp.AlterColumn, this=column, drop=True) 6329 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6330 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6331 if self._match(TokenType.COMMENT): 6332 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6333 if self._match_text_seq("DROP", "NOT", "NULL"): 6334 return self.expression( 6335 exp.AlterColumn, 6336 this=column, 6337 drop=True, 6338 allow_null=True, 6339 ) 6340 if self._match_text_seq("SET", "NOT", "NULL"): 6341 return self.expression( 6342 exp.AlterColumn, 6343 this=column, 6344 allow_null=False, 6345 ) 6346 self._match_text_seq("SET", "DATA") 6347 self._match_text_seq("TYPE") 6348 return self.expression( 6349 exp.AlterColumn, 6350 this=column, 6351 dtype=self._parse_types(), 6352 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6353 using=self._match(TokenType.USING) and self._parse_assignment(), 6354 ) 6355 6356 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6357 if self._match_texts(("ALL", "EVEN", "AUTO")): 6358 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6359 6360 self._match_text_seq("KEY", "DISTKEY") 6361 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6362 6363 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6364 if compound: 6365 self._match_text_seq("SORTKEY") 6366 6367 if self._match(TokenType.L_PAREN, advance=False): 6368 return self.expression( 6369 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6370 ) 6371 6372 self._match_texts(("AUTO", "NONE")) 6373 return self.expression( 6374 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6375 ) 6376 6377 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6378 index = self._index - 1 6379 6380 partition_exists = self._parse_exists() 6381 if self._match(TokenType.PARTITION, advance=False): 6382 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6383 6384 self._retreat(index) 6385 return self._parse_csv(self._parse_drop_column) 6386 6387 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6388 if self._match(TokenType.COLUMN): 6389 exists = self._parse_exists() 6390 old_column = self._parse_column() 6391 to = self._match_text_seq("TO") 6392 new_column = self._parse_column() 6393 6394 if old_column is None or to is None or new_column is None: 6395 return None 6396 6397 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6398 6399 self._match_text_seq("TO") 6400 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6401 6402 def _parse_alter_table_set(self) -> exp.AlterSet: 6403 alter_set = self.expression(exp.AlterSet) 6404 6405 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6406 "TABLE", "PROPERTIES" 6407 ): 6408 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6409 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6410 alter_set.set("expressions", [self._parse_assignment()]) 6411 elif self._match_texts(("LOGGED", "UNLOGGED")): 6412 alter_set.set("option", exp.var(self._prev.text.upper())) 6413 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6414 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6415 elif self._match_text_seq("LOCATION"): 6416 alter_set.set("location", self._parse_field()) 6417 elif self._match_text_seq("ACCESS", "METHOD"): 6418 alter_set.set("access_method", self._parse_field()) 6419 elif self._match_text_seq("TABLESPACE"): 6420 alter_set.set("tablespace", self._parse_field()) 6421 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6422 alter_set.set("file_format", [self._parse_field()]) 6423 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6424 alter_set.set("file_format", self._parse_wrapped_options()) 6425 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6426 alter_set.set("copy_options", self._parse_wrapped_options()) 6427 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6428 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6429 else: 6430 if self._match_text_seq("SERDE"): 6431 alter_set.set("serde", self._parse_field()) 6432 6433 alter_set.set("expressions", [self._parse_properties()]) 6434 6435 return alter_set 6436 6437 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6438 start = self._prev 6439 6440 if not self._match(TokenType.TABLE): 6441 return self._parse_as_command(start) 6442 6443 exists = self._parse_exists() 6444 only = self._match_text_seq("ONLY") 6445 this = self._parse_table(schema=True) 6446 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6447 6448 if self._next: 6449 self._advance() 6450 6451 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6452 if parser: 6453 actions = ensure_list(parser(self)) 6454 options = self._parse_csv(self._parse_property) 6455 6456 if not self._curr and actions: 6457 return self.expression( 6458 exp.AlterTable, 6459 this=this, 6460 exists=exists, 6461 actions=actions, 6462 only=only, 6463 options=options, 6464 cluster=cluster, 6465 ) 6466 6467 return self._parse_as_command(start) 6468 6469 def _parse_merge(self) -> exp.Merge: 6470 self._match(TokenType.INTO) 6471 target = self._parse_table() 6472 6473 if target and self._match(TokenType.ALIAS, advance=False): 6474 target.set("alias", self._parse_table_alias()) 6475 6476 self._match(TokenType.USING) 6477 using = self._parse_table() 6478 6479 self._match(TokenType.ON) 6480 on = self._parse_assignment() 6481 6482 return self.expression( 6483 exp.Merge, 6484 this=target, 6485 using=using, 6486 on=on, 6487 expressions=self._parse_when_matched(), 6488 ) 6489 6490 def _parse_when_matched(self) -> t.List[exp.When]: 6491 whens = [] 6492 6493 while self._match(TokenType.WHEN): 6494 matched = not self._match(TokenType.NOT) 6495 self._match_text_seq("MATCHED") 6496 source = ( 6497 False 6498 if self._match_text_seq("BY", "TARGET") 6499 else self._match_text_seq("BY", "SOURCE") 6500 ) 6501 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6502 6503 self._match(TokenType.THEN) 6504 6505 if self._match(TokenType.INSERT): 6506 _this = self._parse_star() 6507 if _this: 6508 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6509 else: 6510 then = self.expression( 6511 exp.Insert, 6512 this=self._parse_value(), 6513 expression=self._match_text_seq("VALUES") and self._parse_value(), 6514 ) 6515 elif self._match(TokenType.UPDATE): 6516 expressions = self._parse_star() 6517 if expressions: 6518 then = self.expression(exp.Update, expressions=expressions) 6519 else: 6520 then = self.expression( 6521 exp.Update, 6522 expressions=self._match(TokenType.SET) 6523 and self._parse_csv(self._parse_equality), 6524 ) 6525 elif self._match(TokenType.DELETE): 6526 then = self.expression(exp.Var, this=self._prev.text) 6527 else: 6528 then = None 6529 6530 whens.append( 6531 self.expression( 6532 exp.When, 6533 matched=matched, 6534 source=source, 6535 condition=condition, 6536 then=then, 6537 ) 6538 ) 6539 return whens 6540 6541 def _parse_show(self) -> t.Optional[exp.Expression]: 6542 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6543 if parser: 6544 return parser(self) 6545 return self._parse_as_command(self._prev) 6546 6547 def _parse_set_item_assignment( 6548 self, kind: t.Optional[str] = None 6549 ) -> t.Optional[exp.Expression]: 6550 index = self._index 6551 6552 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6553 return self._parse_set_transaction(global_=kind == "GLOBAL") 6554 6555 left = self._parse_primary() or self._parse_column() 6556 assignment_delimiter = self._match_texts(("=", "TO")) 6557 6558 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6559 self._retreat(index) 6560 return None 6561 6562 right = self._parse_statement() or self._parse_id_var() 6563 if isinstance(right, (exp.Column, exp.Identifier)): 6564 right = exp.var(right.name) 6565 6566 this = self.expression(exp.EQ, this=left, expression=right) 6567 return self.expression(exp.SetItem, this=this, kind=kind) 6568 6569 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6570 self._match_text_seq("TRANSACTION") 6571 characteristics = self._parse_csv( 6572 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6573 ) 6574 return self.expression( 6575 exp.SetItem, 6576 expressions=characteristics, 6577 kind="TRANSACTION", 6578 **{"global": global_}, # type: ignore 6579 ) 6580 6581 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6582 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6583 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6584 6585 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6586 index = self._index 6587 set_ = self.expression( 6588 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6589 ) 6590 6591 if self._curr: 6592 self._retreat(index) 6593 return self._parse_as_command(self._prev) 6594 6595 return set_ 6596 6597 def _parse_var_from_options( 6598 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6599 ) -> t.Optional[exp.Var]: 6600 start = self._curr 6601 if not start: 6602 return None 6603 6604 option = start.text.upper() 6605 continuations = options.get(option) 6606 6607 index = self._index 6608 self._advance() 6609 for keywords in continuations or []: 6610 if isinstance(keywords, str): 6611 keywords = (keywords,) 6612 6613 if self._match_text_seq(*keywords): 6614 option = f"{option} {' '.join(keywords)}" 6615 break 6616 else: 6617 if continuations or continuations is None: 6618 if raise_unmatched: 6619 self.raise_error(f"Unknown option {option}") 6620 6621 self._retreat(index) 6622 return None 6623 6624 return exp.var(option) 6625 6626 def _parse_as_command(self, start: Token) -> exp.Command: 6627 while self._curr: 6628 self._advance() 6629 text = self._find_sql(start, self._prev) 6630 size = len(start.text) 6631 self._warn_unsupported() 6632 return exp.Command(this=text[:size], expression=text[size:]) 6633 6634 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6635 settings = [] 6636 6637 self._match_l_paren() 6638 kind = self._parse_id_var() 6639 6640 if self._match(TokenType.L_PAREN): 6641 while True: 6642 key = self._parse_id_var() 6643 value = self._parse_primary() 6644 6645 if not key and value is None: 6646 break 6647 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6648 self._match(TokenType.R_PAREN) 6649 6650 self._match_r_paren() 6651 6652 return self.expression( 6653 exp.DictProperty, 6654 this=this, 6655 kind=kind.this if kind else None, 6656 settings=settings, 6657 ) 6658 6659 def _parse_dict_range(self, this: str) -> exp.DictRange: 6660 self._match_l_paren() 6661 has_min = self._match_text_seq("MIN") 6662 if has_min: 6663 min = self._parse_var() or self._parse_primary() 6664 self._match_text_seq("MAX") 6665 max = self._parse_var() or self._parse_primary() 6666 else: 6667 max = self._parse_var() or self._parse_primary() 6668 min = exp.Literal.number(0) 6669 self._match_r_paren() 6670 return self.expression(exp.DictRange, this=this, min=min, max=max) 6671 6672 def _parse_comprehension( 6673 self, this: t.Optional[exp.Expression] 6674 ) -> t.Optional[exp.Comprehension]: 6675 index = self._index 6676 expression = self._parse_column() 6677 if not self._match(TokenType.IN): 6678 self._retreat(index - 1) 6679 return None 6680 iterator = self._parse_column() 6681 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6682 return self.expression( 6683 exp.Comprehension, 6684 this=this, 6685 expression=expression, 6686 iterator=iterator, 6687 condition=condition, 6688 ) 6689 6690 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6691 if self._match(TokenType.HEREDOC_STRING): 6692 return self.expression(exp.Heredoc, this=self._prev.text) 6693 6694 if not self._match_text_seq("$"): 6695 return None 6696 6697 tags = ["$"] 6698 tag_text = None 6699 6700 if self._is_connected(): 6701 self._advance() 6702 tags.append(self._prev.text.upper()) 6703 else: 6704 self.raise_error("No closing $ found") 6705 6706 if tags[-1] != "$": 6707 if self._is_connected() and self._match_text_seq("$"): 6708 tag_text = tags[-1] 6709 tags.append("$") 6710 else: 6711 self.raise_error("No closing $ found") 6712 6713 heredoc_start = self._curr 6714 6715 while self._curr: 6716 if self._match_text_seq(*tags, advance=False): 6717 this = self._find_sql(heredoc_start, self._prev) 6718 self._advance(len(tags)) 6719 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6720 6721 self._advance() 6722 6723 self.raise_error(f"No closing {''.join(tags)} found") 6724 return None 6725 6726 def _find_parser( 6727 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6728 ) -> t.Optional[t.Callable]: 6729 if not self._curr: 6730 return None 6731 6732 index = self._index 6733 this = [] 6734 while True: 6735 # The current token might be multiple words 6736 curr = self._curr.text.upper() 6737 key = curr.split(" ") 6738 this.append(curr) 6739 6740 self._advance() 6741 result, trie = in_trie(trie, key) 6742 if result == TrieResult.FAILED: 6743 break 6744 6745 if result == TrieResult.EXISTS: 6746 subparser = parsers[" ".join(this)] 6747 return subparser 6748 6749 self._retreat(index) 6750 return None 6751 6752 def _match(self, token_type, advance=True, expression=None): 6753 if not self._curr: 6754 return None 6755 6756 if self._curr.token_type == token_type: 6757 if advance: 6758 self._advance() 6759 self._add_comments(expression) 6760 return True 6761 6762 return None 6763 6764 def _match_set(self, types, advance=True): 6765 if not self._curr: 6766 return None 6767 6768 if self._curr.token_type in types: 6769 if advance: 6770 self._advance() 6771 return True 6772 6773 return None 6774 6775 def _match_pair(self, token_type_a, token_type_b, advance=True): 6776 if not self._curr or not self._next: 6777 return None 6778 6779 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6780 if advance: 6781 self._advance(2) 6782 return True 6783 6784 return None 6785 6786 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6787 if not self._match(TokenType.L_PAREN, expression=expression): 6788 self.raise_error("Expecting (") 6789 6790 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6791 if not self._match(TokenType.R_PAREN, expression=expression): 6792 self.raise_error("Expecting )") 6793 6794 def _match_texts(self, texts, advance=True): 6795 if self._curr and self._curr.text.upper() in texts: 6796 if advance: 6797 self._advance() 6798 return True 6799 return None 6800 6801 def _match_text_seq(self, *texts, advance=True): 6802 index = self._index 6803 for text in texts: 6804 if self._curr and self._curr.text.upper() == text: 6805 self._advance() 6806 else: 6807 self._retreat(index) 6808 return None 6809 6810 if not advance: 6811 self._retreat(index) 6812 6813 return True 6814 6815 def _replace_lambda( 6816 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6817 ) -> t.Optional[exp.Expression]: 6818 if not node: 6819 return node 6820 6821 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6822 6823 for column in node.find_all(exp.Column): 6824 typ = lambda_types.get(column.parts[0].name) 6825 if typ is not None: 6826 dot_or_id = column.to_dot() if column.table else column.this 6827 6828 if typ: 6829 dot_or_id = self.expression( 6830 exp.Cast, 6831 this=dot_or_id, 6832 to=typ, 6833 ) 6834 6835 parent = column.parent 6836 6837 while isinstance(parent, exp.Dot): 6838 if not isinstance(parent.parent, exp.Dot): 6839 parent.replace(dot_or_id) 6840 break 6841 parent = parent.parent 6842 else: 6843 if column is node: 6844 node = dot_or_id 6845 else: 6846 column.replace(dot_or_id) 6847 return node 6848 6849 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6850 start = self._prev 6851 6852 # Not to be confused with TRUNCATE(number, decimals) function call 6853 if self._match(TokenType.L_PAREN): 6854 self._retreat(self._index - 2) 6855 return self._parse_function() 6856 6857 # Clickhouse supports TRUNCATE DATABASE as well 6858 is_database = self._match(TokenType.DATABASE) 6859 6860 self._match(TokenType.TABLE) 6861 6862 exists = self._parse_exists(not_=False) 6863 6864 expressions = self._parse_csv( 6865 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6866 ) 6867 6868 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6869 6870 if self._match_text_seq("RESTART", "IDENTITY"): 6871 identity = "RESTART" 6872 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6873 identity = "CONTINUE" 6874 else: 6875 identity = None 6876 6877 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6878 option = self._prev.text 6879 else: 6880 option = None 6881 6882 partition = self._parse_partition() 6883 6884 # Fallback case 6885 if self._curr: 6886 return self._parse_as_command(start) 6887 6888 return self.expression( 6889 exp.TruncateTable, 6890 expressions=expressions, 6891 is_database=is_database, 6892 exists=exists, 6893 cluster=cluster, 6894 identity=identity, 6895 option=option, 6896 partition=partition, 6897 ) 6898 6899 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6900 this = self._parse_ordered(self._parse_opclass) 6901 6902 if not self._match(TokenType.WITH): 6903 return this 6904 6905 op = self._parse_var(any_token=True) 6906 6907 return self.expression(exp.WithOperator, this=this, op=op) 6908 6909 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6910 self._match(TokenType.EQ) 6911 self._match(TokenType.L_PAREN) 6912 6913 opts: t.List[t.Optional[exp.Expression]] = [] 6914 while self._curr and not self._match(TokenType.R_PAREN): 6915 if self._match_text_seq("FORMAT_NAME", "="): 6916 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6917 # so we parse it separately to use _parse_field() 6918 prop = self.expression( 6919 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6920 ) 6921 opts.append(prop) 6922 else: 6923 opts.append(self._parse_property()) 6924 6925 self._match(TokenType.COMMA) 6926 6927 return opts 6928 6929 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6930 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6931 6932 options = [] 6933 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6934 option = self._parse_var(any_token=True) 6935 prev = self._prev.text.upper() 6936 6937 # Different dialects might separate options and values by white space, "=" and "AS" 6938 self._match(TokenType.EQ) 6939 self._match(TokenType.ALIAS) 6940 6941 param = self.expression(exp.CopyParameter, this=option) 6942 6943 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6944 TokenType.L_PAREN, advance=False 6945 ): 6946 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6947 param.set("expressions", self._parse_wrapped_options()) 6948 elif prev == "FILE_FORMAT": 6949 # T-SQL's external file format case 6950 param.set("expression", self._parse_field()) 6951 else: 6952 param.set("expression", self._parse_unquoted_field()) 6953 6954 options.append(param) 6955 self._match(sep) 6956 6957 return options 6958 6959 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6960 expr = self.expression(exp.Credentials) 6961 6962 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6963 expr.set("storage", self._parse_field()) 6964 if self._match_text_seq("CREDENTIALS"): 6965 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6966 creds = ( 6967 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6968 ) 6969 expr.set("credentials", creds) 6970 if self._match_text_seq("ENCRYPTION"): 6971 expr.set("encryption", self._parse_wrapped_options()) 6972 if self._match_text_seq("IAM_ROLE"): 6973 expr.set("iam_role", self._parse_field()) 6974 if self._match_text_seq("REGION"): 6975 expr.set("region", self._parse_field()) 6976 6977 return expr 6978 6979 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6980 return self._parse_field() 6981 6982 def _parse_copy(self) -> exp.Copy | exp.Command: 6983 start = self._prev 6984 6985 self._match(TokenType.INTO) 6986 6987 this = ( 6988 self._parse_select(nested=True, parse_subquery_alias=False) 6989 if self._match(TokenType.L_PAREN, advance=False) 6990 else self._parse_table(schema=True) 6991 ) 6992 6993 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6994 6995 files = self._parse_csv(self._parse_file_location) 6996 credentials = self._parse_credentials() 6997 6998 self._match_text_seq("WITH") 6999 7000 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7001 7002 # Fallback case 7003 if self._curr: 7004 return self._parse_as_command(start) 7005 7006 return self.expression( 7007 exp.Copy, 7008 this=this, 7009 kind=kind, 7010 credentials=credentials, 7011 files=files, 7012 params=params, 7013 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1277 def __init__( 1278 self, 1279 error_level: t.Optional[ErrorLevel] = None, 1280 error_message_context: int = 100, 1281 max_errors: int = 3, 1282 dialect: DialectType = None, 1283 ): 1284 from sqlglot.dialects import Dialect 1285 1286 self.error_level = error_level or ErrorLevel.IMMEDIATE 1287 self.error_message_context = error_message_context 1288 self.max_errors = max_errors 1289 self.dialect = Dialect.get_or_raise(dialect) 1290 self.reset()
1302 def parse( 1303 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1304 ) -> t.List[t.Optional[exp.Expression]]: 1305 """ 1306 Parses a list of tokens and returns a list of syntax trees, one tree 1307 per parsed SQL statement. 1308 1309 Args: 1310 raw_tokens: The list of tokens. 1311 sql: The original SQL string, used to produce helpful debug messages. 1312 1313 Returns: 1314 The list of the produced syntax trees. 1315 """ 1316 return self._parse( 1317 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1318 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1320 def parse_into( 1321 self, 1322 expression_types: exp.IntoType, 1323 raw_tokens: t.List[Token], 1324 sql: t.Optional[str] = None, 1325 ) -> t.List[t.Optional[exp.Expression]]: 1326 """ 1327 Parses a list of tokens into a given Expression type. If a collection of Expression 1328 types is given instead, this method will try to parse the token list into each one 1329 of them, stopping at the first for which the parsing succeeds. 1330 1331 Args: 1332 expression_types: The expression type(s) to try and parse the token list into. 1333 raw_tokens: The list of tokens. 1334 sql: The original SQL string, used to produce helpful debug messages. 1335 1336 Returns: 1337 The target Expression. 1338 """ 1339 errors = [] 1340 for expression_type in ensure_list(expression_types): 1341 parser = self.EXPRESSION_PARSERS.get(expression_type) 1342 if not parser: 1343 raise TypeError(f"No parser registered for {expression_type}") 1344 1345 try: 1346 return self._parse(parser, raw_tokens, sql) 1347 except ParseError as e: 1348 e.errors[0]["into_expression"] = expression_type 1349 errors.append(e) 1350 1351 raise ParseError( 1352 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1353 errors=merge_errors(errors), 1354 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1394 def check_errors(self) -> None: 1395 """Logs or raises any found errors, depending on the chosen error level setting.""" 1396 if self.error_level == ErrorLevel.WARN: 1397 for error in self.errors: 1398 logger.error(str(error)) 1399 elif self.error_level == ErrorLevel.RAISE and self.errors: 1400 raise ParseError( 1401 concat_messages(self.errors, self.max_errors), 1402 errors=merge_errors(self.errors), 1403 )
Logs or raises any found errors, depending on the chosen error level setting.
1405 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1406 """ 1407 Appends an error in the list of recorded errors or raises it, depending on the chosen 1408 error level setting. 1409 """ 1410 token = token or self._curr or self._prev or Token.string("") 1411 start = token.start 1412 end = token.end + 1 1413 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1414 highlight = self.sql[start:end] 1415 end_context = self.sql[end : end + self.error_message_context] 1416 1417 error = ParseError.new( 1418 f"{message}. Line {token.line}, Col: {token.col}.\n" 1419 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1420 description=message, 1421 line=token.line, 1422 col=token.col, 1423 start_context=start_context, 1424 highlight=highlight, 1425 end_context=end_context, 1426 ) 1427 1428 if self.error_level == ErrorLevel.IMMEDIATE: 1429 raise error 1430 1431 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1433 def expression( 1434 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1435 ) -> E: 1436 """ 1437 Creates a new, validated Expression. 1438 1439 Args: 1440 exp_class: The expression class to instantiate. 1441 comments: An optional list of comments to attach to the expression. 1442 kwargs: The arguments to set for the expression along with their respective values. 1443 1444 Returns: 1445 The target expression. 1446 """ 1447 instance = exp_class(**kwargs) 1448 instance.add_comments(comments) if comments else self._add_comments(instance) 1449 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1456 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1457 """ 1458 Validates an Expression, making sure that all its mandatory arguments are set. 1459 1460 Args: 1461 expression: The expression to validate. 1462 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1463 1464 Returns: 1465 The validated expression. 1466 """ 1467 if self.error_level != ErrorLevel.IGNORE: 1468 for error_message in expression.error_messages(args): 1469 self.raise_error(error_message) 1470 1471 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.