sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } - {TokenType.IDENTIFIER} 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.COPY, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IDENTIFIER, 349 TokenType.IS, 350 TokenType.ISNULL, 351 TokenType.INTERVAL, 352 TokenType.KEEP, 353 TokenType.KILL, 354 TokenType.LEFT, 355 TokenType.LOAD, 356 TokenType.MERGE, 357 TokenType.NATURAL, 358 TokenType.NEXT, 359 TokenType.OFFSET, 360 TokenType.OPERATOR, 361 TokenType.ORDINALITY, 362 TokenType.OVERLAPS, 363 TokenType.OVERWRITE, 364 TokenType.PARTITION, 365 TokenType.PERCENT, 366 TokenType.PIVOT, 367 TokenType.PRAGMA, 368 TokenType.RANGE, 369 TokenType.RECURSIVE, 370 TokenType.REFERENCES, 371 TokenType.REFRESH, 372 TokenType.REPLACE, 373 TokenType.RIGHT, 374 TokenType.ROW, 375 TokenType.ROWS, 376 TokenType.SEMI, 377 TokenType.SET, 378 TokenType.SETTINGS, 379 TokenType.SHOW, 380 TokenType.TEMPORARY, 381 TokenType.TOP, 382 TokenType.TRUE, 383 TokenType.TRUNCATE, 384 TokenType.UNIQUE, 385 TokenType.UNPIVOT, 386 TokenType.UPDATE, 387 TokenType.USE, 388 TokenType.VOLATILE, 389 TokenType.WINDOW, 390 *CREATABLES, 391 *SUBQUERY_PREDICATES, 392 *TYPE_TOKENS, 393 *NO_PAREN_FUNCTIONS, 394 } 395 396 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 397 398 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 399 TokenType.ANTI, 400 TokenType.APPLY, 401 TokenType.ASOF, 402 TokenType.FULL, 403 TokenType.LEFT, 404 TokenType.LOCK, 405 TokenType.NATURAL, 406 TokenType.OFFSET, 407 TokenType.RIGHT, 408 TokenType.SEMI, 409 TokenType.WINDOW, 410 } 411 412 ALIAS_TOKENS = ID_VAR_TOKENS 413 414 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 415 416 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 417 418 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 419 420 FUNC_TOKENS = { 421 TokenType.COLLATE, 422 TokenType.COMMAND, 423 TokenType.CURRENT_DATE, 424 TokenType.CURRENT_DATETIME, 425 TokenType.CURRENT_TIMESTAMP, 426 TokenType.CURRENT_TIME, 427 TokenType.CURRENT_USER, 428 TokenType.FILTER, 429 TokenType.FIRST, 430 TokenType.FORMAT, 431 TokenType.GLOB, 432 TokenType.IDENTIFIER, 433 TokenType.INDEX, 434 TokenType.ISNULL, 435 TokenType.ILIKE, 436 TokenType.INSERT, 437 TokenType.LIKE, 438 TokenType.MERGE, 439 TokenType.OFFSET, 440 TokenType.PRIMARY_KEY, 441 TokenType.RANGE, 442 TokenType.REPLACE, 443 TokenType.RLIKE, 444 TokenType.ROW, 445 TokenType.UNNEST, 446 TokenType.VAR, 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.SEQUENCE, 450 TokenType.DATE, 451 TokenType.DATETIME, 452 TokenType.TABLE, 453 TokenType.TIMESTAMP, 454 TokenType.TIMESTAMPTZ, 455 TokenType.TRUNCATE, 456 TokenType.WINDOW, 457 TokenType.XOR, 458 *TYPE_TOKENS, 459 *SUBQUERY_PREDICATES, 460 } 461 462 CONJUNCTION = { 463 TokenType.AND: exp.And, 464 TokenType.OR: exp.Or, 465 } 466 467 EQUALITY = { 468 TokenType.COLON_EQ: exp.PropertyEQ, 469 TokenType.EQ: exp.EQ, 470 TokenType.NEQ: exp.NEQ, 471 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 472 } 473 474 COMPARISON = { 475 TokenType.GT: exp.GT, 476 TokenType.GTE: exp.GTE, 477 TokenType.LT: exp.LT, 478 TokenType.LTE: exp.LTE, 479 } 480 481 BITWISE = { 482 TokenType.AMP: exp.BitwiseAnd, 483 TokenType.CARET: exp.BitwiseXor, 484 TokenType.PIPE: exp.BitwiseOr, 485 } 486 487 TERM = { 488 TokenType.DASH: exp.Sub, 489 TokenType.PLUS: exp.Add, 490 TokenType.MOD: exp.Mod, 491 TokenType.COLLATE: exp.Collate, 492 } 493 494 FACTOR = { 495 TokenType.DIV: exp.IntDiv, 496 TokenType.LR_ARROW: exp.Distance, 497 TokenType.SLASH: exp.Div, 498 TokenType.STAR: exp.Mul, 499 } 500 501 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 502 503 TIMES = { 504 TokenType.TIME, 505 TokenType.TIMETZ, 506 } 507 508 TIMESTAMPS = { 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TIMESTAMPLTZ, 512 *TIMES, 513 } 514 515 SET_OPERATIONS = { 516 TokenType.UNION, 517 TokenType.INTERSECT, 518 TokenType.EXCEPT, 519 } 520 521 JOIN_METHODS = { 522 TokenType.ASOF, 523 TokenType.NATURAL, 524 TokenType.POSITIONAL, 525 } 526 527 JOIN_SIDES = { 528 TokenType.LEFT, 529 TokenType.RIGHT, 530 TokenType.FULL, 531 } 532 533 JOIN_KINDS = { 534 TokenType.INNER, 535 TokenType.OUTER, 536 TokenType.CROSS, 537 TokenType.SEMI, 538 TokenType.ANTI, 539 } 540 541 JOIN_HINTS: t.Set[str] = set() 542 543 LAMBDAS = { 544 TokenType.ARROW: lambda self, expressions: self.expression( 545 exp.Lambda, 546 this=self._replace_lambda( 547 self._parse_conjunction(), 548 {node.name for node in expressions}, 549 ), 550 expressions=expressions, 551 ), 552 TokenType.FARROW: lambda self, expressions: self.expression( 553 exp.Kwarg, 554 this=exp.var(expressions[0].name), 555 expression=self._parse_conjunction(), 556 ), 557 } 558 559 COLUMN_OPERATORS = { 560 TokenType.DOT: None, 561 TokenType.DCOLON: lambda self, this, to: self.expression( 562 exp.Cast if self.STRICT_CAST else exp.TryCast, 563 this=this, 564 to=to, 565 ), 566 TokenType.ARROW: lambda self, this, path: self.expression( 567 exp.JSONExtract, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.DARROW: lambda self, this, path: self.expression( 573 exp.JSONExtractScalar, 574 this=this, 575 expression=self.dialect.to_json_path(path), 576 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 577 ), 578 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtract, 580 this=this, 581 expression=path, 582 ), 583 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 584 exp.JSONBExtractScalar, 585 this=this, 586 expression=path, 587 ), 588 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 589 exp.JSONBContains, 590 this=this, 591 expression=key, 592 ), 593 } 594 595 EXPRESSION_PARSERS = { 596 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 597 exp.Column: lambda self: self._parse_column(), 598 exp.Condition: lambda self: self._parse_conjunction(), 599 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 600 exp.Expression: lambda self: self._parse_expression(), 601 exp.From: lambda self: self._parse_from(), 602 exp.Group: lambda self: self._parse_group(), 603 exp.Having: lambda self: self._parse_having(), 604 exp.Identifier: lambda self: self._parse_id_var(), 605 exp.Join: lambda self: self._parse_join(), 606 exp.Lambda: lambda self: self._parse_lambda(), 607 exp.Lateral: lambda self: self._parse_lateral(), 608 exp.Limit: lambda self: self._parse_limit(), 609 exp.Offset: lambda self: self._parse_offset(), 610 exp.Order: lambda self: self._parse_order(), 611 exp.Ordered: lambda self: self._parse_ordered(), 612 exp.Properties: lambda self: self._parse_properties(), 613 exp.Qualify: lambda self: self._parse_qualify(), 614 exp.Returning: lambda self: self._parse_returning(), 615 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 616 exp.Table: lambda self: self._parse_table_parts(), 617 exp.TableAlias: lambda self: self._parse_table_alias(), 618 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 619 exp.Where: lambda self: self._parse_where(), 620 exp.Window: lambda self: self._parse_named_window(), 621 exp.With: lambda self: self._parse_with(), 622 "JOIN_TYPE": lambda self: self._parse_join_parts(), 623 } 624 625 STATEMENT_PARSERS = { 626 TokenType.ALTER: lambda self: self._parse_alter(), 627 TokenType.BEGIN: lambda self: self._parse_transaction(), 628 TokenType.CACHE: lambda self: self._parse_cache(), 629 TokenType.COMMENT: lambda self: self._parse_comment(), 630 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 631 TokenType.COPY: lambda self: self._parse_copy(), 632 TokenType.CREATE: lambda self: self._parse_create(), 633 TokenType.DELETE: lambda self: self._parse_delete(), 634 TokenType.DESC: lambda self: self._parse_describe(), 635 TokenType.DESCRIBE: lambda self: self._parse_describe(), 636 TokenType.DROP: lambda self: self._parse_drop(), 637 TokenType.INSERT: lambda self: self._parse_insert(), 638 TokenType.KILL: lambda self: self._parse_kill(), 639 TokenType.LOAD: lambda self: self._parse_load(), 640 TokenType.MERGE: lambda self: self._parse_merge(), 641 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 642 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 643 TokenType.REFRESH: lambda self: self._parse_refresh(), 644 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 645 TokenType.SET: lambda self: self._parse_set(), 646 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 647 TokenType.UNCACHE: lambda self: self._parse_uncache(), 648 TokenType.UPDATE: lambda self: self._parse_update(), 649 TokenType.USE: lambda self: self.expression( 650 exp.Use, 651 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 652 this=self._parse_table(schema=False), 653 ), 654 } 655 656 UNARY_PARSERS = { 657 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 658 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 659 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 660 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 661 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 662 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 663 } 664 665 STRING_PARSERS = { 666 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 667 exp.RawString, this=token.text 668 ), 669 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 670 exp.National, this=token.text 671 ), 672 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 673 TokenType.STRING: lambda self, token: self.expression( 674 exp.Literal, this=token.text, is_string=True 675 ), 676 TokenType.UNICODE_STRING: lambda self, token: self.expression( 677 exp.UnicodeString, 678 this=token.text, 679 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 680 ), 681 } 682 683 NUMERIC_PARSERS = { 684 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 685 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 686 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 687 TokenType.NUMBER: lambda self, token: self.expression( 688 exp.Literal, this=token.text, is_string=False 689 ), 690 } 691 692 PRIMARY_PARSERS = { 693 **STRING_PARSERS, 694 **NUMERIC_PARSERS, 695 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 696 TokenType.NULL: lambda self, _: self.expression(exp.Null), 697 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 698 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 699 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 700 TokenType.STAR: lambda self, _: self.expression( 701 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 702 ), 703 } 704 705 PLACEHOLDER_PARSERS = { 706 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 707 TokenType.PARAMETER: lambda self: self._parse_parameter(), 708 TokenType.COLON: lambda self: ( 709 self.expression(exp.Placeholder, this=self._prev.text) 710 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 711 else None 712 ), 713 } 714 715 RANGE_PARSERS = { 716 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 717 TokenType.GLOB: binary_range_parser(exp.Glob), 718 TokenType.ILIKE: binary_range_parser(exp.ILike), 719 TokenType.IN: lambda self, this: self._parse_in(this), 720 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 721 TokenType.IS: lambda self, this: self._parse_is(this), 722 TokenType.LIKE: binary_range_parser(exp.Like), 723 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 724 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 725 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 726 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 727 } 728 729 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 730 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 731 "AUTO": lambda self: self._parse_auto_property(), 732 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 733 "BACKUP": lambda self: self.expression( 734 exp.BackupProperty, this=self._parse_var(any_token=True) 735 ), 736 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 737 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 738 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 739 "CHECKSUM": lambda self: self._parse_checksum(), 740 "CLUSTER BY": lambda self: self._parse_cluster(), 741 "CLUSTERED": lambda self: self._parse_clustered_by(), 742 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 743 exp.CollateProperty, **kwargs 744 ), 745 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 746 "CONTAINS": lambda self: self._parse_contains_property(), 747 "COPY": lambda self: self._parse_copy_property(), 748 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 749 "DEFINER": lambda self: self._parse_definer(), 750 "DETERMINISTIC": lambda self: self.expression( 751 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 752 ), 753 "DISTKEY": lambda self: self._parse_distkey(), 754 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 755 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 756 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 757 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 758 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 759 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 760 "FREESPACE": lambda self: self._parse_freespace(), 761 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 762 "HEAP": lambda self: self.expression(exp.HeapProperty), 763 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 764 "IMMUTABLE": lambda self: self.expression( 765 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 766 ), 767 "INHERITS": lambda self: self.expression( 768 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 769 ), 770 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 771 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 772 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 773 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 774 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 775 "LIKE": lambda self: self._parse_create_like(), 776 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 777 "LOCK": lambda self: self._parse_locking(), 778 "LOCKING": lambda self: self._parse_locking(), 779 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 780 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 781 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 782 "MODIFIES": lambda self: self._parse_modifies_property(), 783 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 784 "NO": lambda self: self._parse_no_property(), 785 "ON": lambda self: self._parse_on_property(), 786 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 787 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 788 "PARTITION": lambda self: self._parse_partitioned_of(), 789 "PARTITION BY": lambda self: self._parse_partitioned_by(), 790 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 791 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 792 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 793 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 794 "READS": lambda self: self._parse_reads_property(), 795 "REMOTE": lambda self: self._parse_remote_with_connection(), 796 "RETURNS": lambda self: self._parse_returns(), 797 "ROW": lambda self: self._parse_row(), 798 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 799 "SAMPLE": lambda self: self.expression( 800 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 801 ), 802 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 803 "SETTINGS": lambda self: self.expression( 804 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 805 ), 806 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 807 "SORTKEY": lambda self: self._parse_sortkey(), 808 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 809 "STABLE": lambda self: self.expression( 810 exp.StabilityProperty, this=exp.Literal.string("STABLE") 811 ), 812 "STORED": lambda self: self._parse_stored(), 813 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 814 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 815 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 816 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 817 "TO": lambda self: self._parse_to_table(), 818 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 819 "TRANSFORM": lambda self: self.expression( 820 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 821 ), 822 "TTL": lambda self: self._parse_ttl(), 823 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 824 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 825 "VOLATILE": lambda self: self._parse_volatile_property(), 826 "WITH": lambda self: self._parse_with_property(), 827 } 828 829 CONSTRAINT_PARSERS = { 830 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 831 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 832 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 833 "CHARACTER SET": lambda self: self.expression( 834 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 835 ), 836 "CHECK": lambda self: self.expression( 837 exp.CheckColumnConstraint, 838 this=self._parse_wrapped(self._parse_conjunction), 839 enforced=self._match_text_seq("ENFORCED"), 840 ), 841 "COLLATE": lambda self: self.expression( 842 exp.CollateColumnConstraint, this=self._parse_var() 843 ), 844 "COMMENT": lambda self: self.expression( 845 exp.CommentColumnConstraint, this=self._parse_string() 846 ), 847 "COMPRESS": lambda self: self._parse_compress(), 848 "CLUSTERED": lambda self: self.expression( 849 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 850 ), 851 "NONCLUSTERED": lambda self: self.expression( 852 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 853 ), 854 "DEFAULT": lambda self: self.expression( 855 exp.DefaultColumnConstraint, this=self._parse_bitwise() 856 ), 857 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 858 "EPHEMERAL": lambda self: self.expression( 859 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 860 ), 861 "EXCLUDE": lambda self: self.expression( 862 exp.ExcludeColumnConstraint, this=self._parse_index_params() 863 ), 864 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 865 "FORMAT": lambda self: self.expression( 866 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 867 ), 868 "GENERATED": lambda self: self._parse_generated_as_identity(), 869 "IDENTITY": lambda self: self._parse_auto_increment(), 870 "INLINE": lambda self: self._parse_inline(), 871 "LIKE": lambda self: self._parse_create_like(), 872 "NOT": lambda self: self._parse_not_constraint(), 873 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 874 "ON": lambda self: ( 875 self._match(TokenType.UPDATE) 876 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 877 ) 878 or self.expression(exp.OnProperty, this=self._parse_id_var()), 879 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 880 "PERIOD": lambda self: self._parse_period_for_system_time(), 881 "PRIMARY KEY": lambda self: self._parse_primary_key(), 882 "REFERENCES": lambda self: self._parse_references(match=False), 883 "TITLE": lambda self: self.expression( 884 exp.TitleColumnConstraint, this=self._parse_var_or_string() 885 ), 886 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 887 "UNIQUE": lambda self: self._parse_unique(), 888 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 889 "WITH": lambda self: self.expression( 890 exp.Properties, expressions=self._parse_wrapped_properties() 891 ), 892 } 893 894 ALTER_PARSERS = { 895 "ADD": lambda self: self._parse_alter_table_add(), 896 "ALTER": lambda self: self._parse_alter_table_alter(), 897 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 898 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 899 "DROP": lambda self: self._parse_alter_table_drop(), 900 "RENAME": lambda self: self._parse_alter_table_rename(), 901 } 902 903 SCHEMA_UNNAMED_CONSTRAINTS = { 904 "CHECK", 905 "EXCLUDE", 906 "FOREIGN KEY", 907 "LIKE", 908 "PERIOD", 909 "PRIMARY KEY", 910 "UNIQUE", 911 } 912 913 NO_PAREN_FUNCTION_PARSERS = { 914 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 915 "CASE": lambda self: self._parse_case(), 916 "IF": lambda self: self._parse_if(), 917 "NEXT": lambda self: self._parse_next_value_for(), 918 } 919 920 INVALID_FUNC_NAME_TOKENS = { 921 TokenType.IDENTIFIER, 922 TokenType.STRING, 923 } 924 925 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 926 927 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 928 929 FUNCTION_PARSERS = { 930 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 931 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 932 "DECODE": lambda self: self._parse_decode(), 933 "EXTRACT": lambda self: self._parse_extract(), 934 "JSON_OBJECT": lambda self: self._parse_json_object(), 935 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 936 "JSON_TABLE": lambda self: self._parse_json_table(), 937 "MATCH": lambda self: self._parse_match_against(), 938 "OPENJSON": lambda self: self._parse_open_json(), 939 "POSITION": lambda self: self._parse_position(), 940 "PREDICT": lambda self: self._parse_predict(), 941 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 942 "STRING_AGG": lambda self: self._parse_string_agg(), 943 "SUBSTRING": lambda self: self._parse_substring(), 944 "TRIM": lambda self: self._parse_trim(), 945 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 946 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 947 } 948 949 QUERY_MODIFIER_PARSERS = { 950 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 951 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 952 TokenType.WHERE: lambda self: ("where", self._parse_where()), 953 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 954 TokenType.HAVING: lambda self: ("having", self._parse_having()), 955 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 956 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 957 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 958 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 959 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 960 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 961 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 962 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 963 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 964 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 965 TokenType.CLUSTER_BY: lambda self: ( 966 "cluster", 967 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 968 ), 969 TokenType.DISTRIBUTE_BY: lambda self: ( 970 "distribute", 971 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 972 ), 973 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 974 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 975 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 976 } 977 978 SET_PARSERS = { 979 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 980 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 981 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 982 "TRANSACTION": lambda self: self._parse_set_transaction(), 983 } 984 985 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 986 987 TYPE_LITERAL_PARSERS = { 988 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 989 } 990 991 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 992 993 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 994 995 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 996 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 997 "ISOLATION": ( 998 ("LEVEL", "REPEATABLE", "READ"), 999 ("LEVEL", "READ", "COMMITTED"), 1000 ("LEVEL", "READ", "UNCOMITTED"), 1001 ("LEVEL", "SERIALIZABLE"), 1002 ), 1003 "READ": ("WRITE", "ONLY"), 1004 } 1005 1006 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1007 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1008 ) 1009 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1010 1011 CREATE_SEQUENCE: OPTIONS_TYPE = { 1012 "SCALE": ("EXTEND", "NOEXTEND"), 1013 "SHARD": ("EXTEND", "NOEXTEND"), 1014 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1015 **dict.fromkeys( 1016 ( 1017 "SESSION", 1018 "GLOBAL", 1019 "KEEP", 1020 "NOKEEP", 1021 "ORDER", 1022 "NOORDER", 1023 "NOCACHE", 1024 "CYCLE", 1025 "NOCYCLE", 1026 "NOMINVALUE", 1027 "NOMAXVALUE", 1028 "NOSCALE", 1029 "NOSHARD", 1030 ), 1031 tuple(), 1032 ), 1033 } 1034 1035 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1036 1037 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1038 1039 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1040 1041 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1042 1043 CLONE_KEYWORDS = {"CLONE", "COPY"} 1044 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1045 1046 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1047 1048 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1049 1050 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1051 1052 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1053 1054 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1055 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1056 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1057 1058 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1059 1060 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1061 1062 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1063 1064 DISTINCT_TOKENS = {TokenType.DISTINCT} 1065 1066 NULL_TOKENS = {TokenType.NULL} 1067 1068 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1069 1070 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1071 1072 STRICT_CAST = True 1073 1074 PREFIXED_PIVOT_COLUMNS = False 1075 IDENTIFY_PIVOT_STRINGS = False 1076 1077 LOG_DEFAULTS_TO_LN = False 1078 1079 # Whether ADD is present for each column added by ALTER TABLE 1080 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1081 1082 # Whether the table sample clause expects CSV syntax 1083 TABLESAMPLE_CSV = False 1084 1085 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1086 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1087 1088 # Whether the TRIM function expects the characters to trim as its first argument 1089 TRIM_PATTERN_FIRST = False 1090 1091 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1092 STRING_ALIASES = False 1093 1094 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1095 MODIFIERS_ATTACHED_TO_UNION = True 1096 UNION_MODIFIERS = {"order", "limit", "offset"} 1097 1098 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1099 NO_PAREN_IF_COMMANDS = True 1100 1101 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1102 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1103 1104 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1105 # If this is True and '(' is not found, the keyword will be treated as an identifier 1106 VALUES_FOLLOWED_BY_PAREN = True 1107 1108 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1109 SUPPORTS_IMPLICIT_UNNEST = False 1110 1111 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1112 INTERVAL_SPANS = True 1113 1114 # Whether a PARTITION clause can follow a table reference 1115 SUPPORTS_PARTITION_SELECTION = False 1116 1117 __slots__ = ( 1118 "error_level", 1119 "error_message_context", 1120 "max_errors", 1121 "dialect", 1122 "sql", 1123 "errors", 1124 "_tokens", 1125 "_index", 1126 "_curr", 1127 "_next", 1128 "_prev", 1129 "_prev_comments", 1130 ) 1131 1132 # Autofilled 1133 SHOW_TRIE: t.Dict = {} 1134 SET_TRIE: t.Dict = {} 1135 1136 def __init__( 1137 self, 1138 error_level: t.Optional[ErrorLevel] = None, 1139 error_message_context: int = 100, 1140 max_errors: int = 3, 1141 dialect: DialectType = None, 1142 ): 1143 from sqlglot.dialects import Dialect 1144 1145 self.error_level = error_level or ErrorLevel.IMMEDIATE 1146 self.error_message_context = error_message_context 1147 self.max_errors = max_errors 1148 self.dialect = Dialect.get_or_raise(dialect) 1149 self.reset() 1150 1151 def reset(self): 1152 self.sql = "" 1153 self.errors = [] 1154 self._tokens = [] 1155 self._index = 0 1156 self._curr = None 1157 self._next = None 1158 self._prev = None 1159 self._prev_comments = None 1160 1161 def parse( 1162 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1163 ) -> t.List[t.Optional[exp.Expression]]: 1164 """ 1165 Parses a list of tokens and returns a list of syntax trees, one tree 1166 per parsed SQL statement. 1167 1168 Args: 1169 raw_tokens: The list of tokens. 1170 sql: The original SQL string, used to produce helpful debug messages. 1171 1172 Returns: 1173 The list of the produced syntax trees. 1174 """ 1175 return self._parse( 1176 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1177 ) 1178 1179 def parse_into( 1180 self, 1181 expression_types: exp.IntoType, 1182 raw_tokens: t.List[Token], 1183 sql: t.Optional[str] = None, 1184 ) -> t.List[t.Optional[exp.Expression]]: 1185 """ 1186 Parses a list of tokens into a given Expression type. If a collection of Expression 1187 types is given instead, this method will try to parse the token list into each one 1188 of them, stopping at the first for which the parsing succeeds. 1189 1190 Args: 1191 expression_types: The expression type(s) to try and parse the token list into. 1192 raw_tokens: The list of tokens. 1193 sql: The original SQL string, used to produce helpful debug messages. 1194 1195 Returns: 1196 The target Expression. 1197 """ 1198 errors = [] 1199 for expression_type in ensure_list(expression_types): 1200 parser = self.EXPRESSION_PARSERS.get(expression_type) 1201 if not parser: 1202 raise TypeError(f"No parser registered for {expression_type}") 1203 1204 try: 1205 return self._parse(parser, raw_tokens, sql) 1206 except ParseError as e: 1207 e.errors[0]["into_expression"] = expression_type 1208 errors.append(e) 1209 1210 raise ParseError( 1211 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1212 errors=merge_errors(errors), 1213 ) from errors[-1] 1214 1215 def _parse( 1216 self, 1217 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1218 raw_tokens: t.List[Token], 1219 sql: t.Optional[str] = None, 1220 ) -> t.List[t.Optional[exp.Expression]]: 1221 self.reset() 1222 self.sql = sql or "" 1223 1224 total = len(raw_tokens) 1225 chunks: t.List[t.List[Token]] = [[]] 1226 1227 for i, token in enumerate(raw_tokens): 1228 if token.token_type == TokenType.SEMICOLON: 1229 if i < total - 1: 1230 chunks.append([]) 1231 else: 1232 chunks[-1].append(token) 1233 1234 expressions = [] 1235 1236 for tokens in chunks: 1237 self._index = -1 1238 self._tokens = tokens 1239 self._advance() 1240 1241 expressions.append(parse_method(self)) 1242 1243 if self._index < len(self._tokens): 1244 self.raise_error("Invalid expression / Unexpected token") 1245 1246 self.check_errors() 1247 1248 return expressions 1249 1250 def check_errors(self) -> None: 1251 """Logs or raises any found errors, depending on the chosen error level setting.""" 1252 if self.error_level == ErrorLevel.WARN: 1253 for error in self.errors: 1254 logger.error(str(error)) 1255 elif self.error_level == ErrorLevel.RAISE and self.errors: 1256 raise ParseError( 1257 concat_messages(self.errors, self.max_errors), 1258 errors=merge_errors(self.errors), 1259 ) 1260 1261 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1262 """ 1263 Appends an error in the list of recorded errors or raises it, depending on the chosen 1264 error level setting. 1265 """ 1266 token = token or self._curr or self._prev or Token.string("") 1267 start = token.start 1268 end = token.end + 1 1269 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1270 highlight = self.sql[start:end] 1271 end_context = self.sql[end : end + self.error_message_context] 1272 1273 error = ParseError.new( 1274 f"{message}. Line {token.line}, Col: {token.col}.\n" 1275 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1276 description=message, 1277 line=token.line, 1278 col=token.col, 1279 start_context=start_context, 1280 highlight=highlight, 1281 end_context=end_context, 1282 ) 1283 1284 if self.error_level == ErrorLevel.IMMEDIATE: 1285 raise error 1286 1287 self.errors.append(error) 1288 1289 def expression( 1290 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1291 ) -> E: 1292 """ 1293 Creates a new, validated Expression. 1294 1295 Args: 1296 exp_class: The expression class to instantiate. 1297 comments: An optional list of comments to attach to the expression. 1298 kwargs: The arguments to set for the expression along with their respective values. 1299 1300 Returns: 1301 The target expression. 1302 """ 1303 instance = exp_class(**kwargs) 1304 instance.add_comments(comments) if comments else self._add_comments(instance) 1305 return self.validate_expression(instance) 1306 1307 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1308 if expression and self._prev_comments: 1309 expression.add_comments(self._prev_comments) 1310 self._prev_comments = None 1311 1312 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1313 """ 1314 Validates an Expression, making sure that all its mandatory arguments are set. 1315 1316 Args: 1317 expression: The expression to validate. 1318 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1319 1320 Returns: 1321 The validated expression. 1322 """ 1323 if self.error_level != ErrorLevel.IGNORE: 1324 for error_message in expression.error_messages(args): 1325 self.raise_error(error_message) 1326 1327 return expression 1328 1329 def _find_sql(self, start: Token, end: Token) -> str: 1330 return self.sql[start.start : end.end + 1] 1331 1332 def _is_connected(self) -> bool: 1333 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1334 1335 def _advance(self, times: int = 1) -> None: 1336 self._index += times 1337 self._curr = seq_get(self._tokens, self._index) 1338 self._next = seq_get(self._tokens, self._index + 1) 1339 1340 if self._index > 0: 1341 self._prev = self._tokens[self._index - 1] 1342 self._prev_comments = self._prev.comments 1343 else: 1344 self._prev = None 1345 self._prev_comments = None 1346 1347 def _retreat(self, index: int) -> None: 1348 if index != self._index: 1349 self._advance(index - self._index) 1350 1351 def _warn_unsupported(self) -> None: 1352 if len(self._tokens) <= 1: 1353 return 1354 1355 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1356 # interested in emitting a warning for the one being currently processed. 1357 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1358 1359 logger.warning( 1360 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1361 ) 1362 1363 def _parse_command(self) -> exp.Command: 1364 self._warn_unsupported() 1365 return self.expression( 1366 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1367 ) 1368 1369 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1370 """ 1371 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1372 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1373 the parser state accordingly 1374 """ 1375 index = self._index 1376 error_level = self.error_level 1377 1378 self.error_level = ErrorLevel.IMMEDIATE 1379 try: 1380 this = parse_method() 1381 except ParseError: 1382 this = None 1383 finally: 1384 if not this or retreat: 1385 self._retreat(index) 1386 self.error_level = error_level 1387 1388 return this 1389 1390 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1391 start = self._prev 1392 exists = self._parse_exists() if allow_exists else None 1393 1394 self._match(TokenType.ON) 1395 1396 materialized = self._match_text_seq("MATERIALIZED") 1397 kind = self._match_set(self.CREATABLES) and self._prev 1398 if not kind: 1399 return self._parse_as_command(start) 1400 1401 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1402 this = self._parse_user_defined_function(kind=kind.token_type) 1403 elif kind.token_type == TokenType.TABLE: 1404 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1405 elif kind.token_type == TokenType.COLUMN: 1406 this = self._parse_column() 1407 else: 1408 this = self._parse_id_var() 1409 1410 self._match(TokenType.IS) 1411 1412 return self.expression( 1413 exp.Comment, 1414 this=this, 1415 kind=kind.text, 1416 expression=self._parse_string(), 1417 exists=exists, 1418 materialized=materialized, 1419 ) 1420 1421 def _parse_to_table( 1422 self, 1423 ) -> exp.ToTableProperty: 1424 table = self._parse_table_parts(schema=True) 1425 return self.expression(exp.ToTableProperty, this=table) 1426 1427 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1428 def _parse_ttl(self) -> exp.Expression: 1429 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1430 this = self._parse_bitwise() 1431 1432 if self._match_text_seq("DELETE"): 1433 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1434 if self._match_text_seq("RECOMPRESS"): 1435 return self.expression( 1436 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1437 ) 1438 if self._match_text_seq("TO", "DISK"): 1439 return self.expression( 1440 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1441 ) 1442 if self._match_text_seq("TO", "VOLUME"): 1443 return self.expression( 1444 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1445 ) 1446 1447 return this 1448 1449 expressions = self._parse_csv(_parse_ttl_action) 1450 where = self._parse_where() 1451 group = self._parse_group() 1452 1453 aggregates = None 1454 if group and self._match(TokenType.SET): 1455 aggregates = self._parse_csv(self._parse_set_item) 1456 1457 return self.expression( 1458 exp.MergeTreeTTL, 1459 expressions=expressions, 1460 where=where, 1461 group=group, 1462 aggregates=aggregates, 1463 ) 1464 1465 def _parse_statement(self) -> t.Optional[exp.Expression]: 1466 if self._curr is None: 1467 return None 1468 1469 if self._match_set(self.STATEMENT_PARSERS): 1470 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1471 1472 if self._match_set(Tokenizer.COMMANDS): 1473 return self._parse_command() 1474 1475 expression = self._parse_expression() 1476 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1477 return self._parse_query_modifiers(expression) 1478 1479 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1480 start = self._prev 1481 temporary = self._match(TokenType.TEMPORARY) 1482 materialized = self._match_text_seq("MATERIALIZED") 1483 1484 kind = self._match_set(self.CREATABLES) and self._prev.text 1485 if not kind: 1486 return self._parse_as_command(start) 1487 1488 if_exists = exists or self._parse_exists() 1489 table = self._parse_table_parts( 1490 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1491 ) 1492 1493 if self._match(TokenType.L_PAREN, advance=False): 1494 expressions = self._parse_wrapped_csv(self._parse_types) 1495 else: 1496 expressions = None 1497 1498 return self.expression( 1499 exp.Drop, 1500 comments=start.comments, 1501 exists=if_exists, 1502 this=table, 1503 expressions=expressions, 1504 kind=kind, 1505 temporary=temporary, 1506 materialized=materialized, 1507 cascade=self._match_text_seq("CASCADE"), 1508 constraints=self._match_text_seq("CONSTRAINTS"), 1509 purge=self._match_text_seq("PURGE"), 1510 ) 1511 1512 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1513 return ( 1514 self._match_text_seq("IF") 1515 and (not not_ or self._match(TokenType.NOT)) 1516 and self._match(TokenType.EXISTS) 1517 ) 1518 1519 def _parse_create(self) -> exp.Create | exp.Command: 1520 # Note: this can't be None because we've matched a statement parser 1521 start = self._prev 1522 comments = self._prev_comments 1523 1524 replace = ( 1525 start.token_type == TokenType.REPLACE 1526 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1527 or self._match_pair(TokenType.OR, TokenType.ALTER) 1528 ) 1529 1530 unique = self._match(TokenType.UNIQUE) 1531 1532 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1533 self._advance() 1534 1535 properties = None 1536 create_token = self._match_set(self.CREATABLES) and self._prev 1537 1538 if not create_token: 1539 # exp.Properties.Location.POST_CREATE 1540 properties = self._parse_properties() 1541 create_token = self._match_set(self.CREATABLES) and self._prev 1542 1543 if not properties or not create_token: 1544 return self._parse_as_command(start) 1545 1546 exists = self._parse_exists(not_=True) 1547 this = None 1548 expression: t.Optional[exp.Expression] = None 1549 indexes = None 1550 no_schema_binding = None 1551 begin = None 1552 end = None 1553 clone = None 1554 1555 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1556 nonlocal properties 1557 if properties and temp_props: 1558 properties.expressions.extend(temp_props.expressions) 1559 elif temp_props: 1560 properties = temp_props 1561 1562 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1563 this = self._parse_user_defined_function(kind=create_token.token_type) 1564 1565 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1566 extend_props(self._parse_properties()) 1567 1568 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1569 1570 if not expression: 1571 if self._match(TokenType.COMMAND): 1572 expression = self._parse_as_command(self._prev) 1573 else: 1574 begin = self._match(TokenType.BEGIN) 1575 return_ = self._match_text_seq("RETURN") 1576 1577 if self._match(TokenType.STRING, advance=False): 1578 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1579 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1580 expression = self._parse_string() 1581 extend_props(self._parse_properties()) 1582 else: 1583 expression = self._parse_statement() 1584 1585 end = self._match_text_seq("END") 1586 1587 if return_: 1588 expression = self.expression(exp.Return, this=expression) 1589 elif create_token.token_type == TokenType.INDEX: 1590 this = self._parse_index(index=self._parse_id_var()) 1591 elif create_token.token_type in self.DB_CREATABLES: 1592 table_parts = self._parse_table_parts( 1593 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1594 ) 1595 1596 # exp.Properties.Location.POST_NAME 1597 self._match(TokenType.COMMA) 1598 extend_props(self._parse_properties(before=True)) 1599 1600 this = self._parse_schema(this=table_parts) 1601 1602 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1603 extend_props(self._parse_properties()) 1604 1605 self._match(TokenType.ALIAS) 1606 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1607 # exp.Properties.Location.POST_ALIAS 1608 extend_props(self._parse_properties()) 1609 1610 if create_token.token_type == TokenType.SEQUENCE: 1611 expression = self._parse_types() 1612 extend_props(self._parse_properties()) 1613 else: 1614 expression = self._parse_ddl_select() 1615 1616 if create_token.token_type == TokenType.TABLE: 1617 # exp.Properties.Location.POST_EXPRESSION 1618 extend_props(self._parse_properties()) 1619 1620 indexes = [] 1621 while True: 1622 index = self._parse_index() 1623 1624 # exp.Properties.Location.POST_INDEX 1625 extend_props(self._parse_properties()) 1626 1627 if not index: 1628 break 1629 else: 1630 self._match(TokenType.COMMA) 1631 indexes.append(index) 1632 elif create_token.token_type == TokenType.VIEW: 1633 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1634 no_schema_binding = True 1635 1636 shallow = self._match_text_seq("SHALLOW") 1637 1638 if self._match_texts(self.CLONE_KEYWORDS): 1639 copy = self._prev.text.lower() == "copy" 1640 clone = self.expression( 1641 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1642 ) 1643 1644 if self._curr: 1645 return self._parse_as_command(start) 1646 1647 return self.expression( 1648 exp.Create, 1649 comments=comments, 1650 this=this, 1651 kind=create_token.text.upper(), 1652 replace=replace, 1653 unique=unique, 1654 expression=expression, 1655 exists=exists, 1656 properties=properties, 1657 indexes=indexes, 1658 no_schema_binding=no_schema_binding, 1659 begin=begin, 1660 end=end, 1661 clone=clone, 1662 ) 1663 1664 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1665 seq = exp.SequenceProperties() 1666 1667 options = [] 1668 index = self._index 1669 1670 while self._curr: 1671 if self._match_text_seq("INCREMENT"): 1672 self._match_text_seq("BY") 1673 self._match_text_seq("=") 1674 seq.set("increment", self._parse_term()) 1675 elif self._match_text_seq("MINVALUE"): 1676 seq.set("minvalue", self._parse_term()) 1677 elif self._match_text_seq("MAXVALUE"): 1678 seq.set("maxvalue", self._parse_term()) 1679 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1680 self._match_text_seq("=") 1681 seq.set("start", self._parse_term()) 1682 elif self._match_text_seq("CACHE"): 1683 # T-SQL allows empty CACHE which is initialized dynamically 1684 seq.set("cache", self._parse_number() or True) 1685 elif self._match_text_seq("OWNED", "BY"): 1686 # "OWNED BY NONE" is the default 1687 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1688 else: 1689 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1690 if opt: 1691 options.append(opt) 1692 else: 1693 break 1694 1695 seq.set("options", options if options else None) 1696 return None if self._index == index else seq 1697 1698 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1699 # only used for teradata currently 1700 self._match(TokenType.COMMA) 1701 1702 kwargs = { 1703 "no": self._match_text_seq("NO"), 1704 "dual": self._match_text_seq("DUAL"), 1705 "before": self._match_text_seq("BEFORE"), 1706 "default": self._match_text_seq("DEFAULT"), 1707 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1708 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1709 "after": self._match_text_seq("AFTER"), 1710 "minimum": self._match_texts(("MIN", "MINIMUM")), 1711 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1712 } 1713 1714 if self._match_texts(self.PROPERTY_PARSERS): 1715 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1716 try: 1717 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1718 except TypeError: 1719 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1720 1721 return None 1722 1723 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1724 return self._parse_wrapped_csv(self._parse_property) 1725 1726 def _parse_property(self) -> t.Optional[exp.Expression]: 1727 if self._match_texts(self.PROPERTY_PARSERS): 1728 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1729 1730 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1731 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1732 1733 if self._match_text_seq("COMPOUND", "SORTKEY"): 1734 return self._parse_sortkey(compound=True) 1735 1736 if self._match_text_seq("SQL", "SECURITY"): 1737 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1738 1739 index = self._index 1740 key = self._parse_column() 1741 1742 if not self._match(TokenType.EQ): 1743 self._retreat(index) 1744 return self._parse_sequence_properties() 1745 1746 return self.expression( 1747 exp.Property, 1748 this=key.to_dot() if isinstance(key, exp.Column) else key, 1749 value=self._parse_bitwise() or self._parse_var(any_token=True), 1750 ) 1751 1752 def _parse_stored(self) -> exp.FileFormatProperty: 1753 self._match(TokenType.ALIAS) 1754 1755 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1756 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1757 1758 return self.expression( 1759 exp.FileFormatProperty, 1760 this=( 1761 self.expression( 1762 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1763 ) 1764 if input_format or output_format 1765 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1766 ), 1767 ) 1768 1769 def _parse_unquoted_field(self): 1770 field = self._parse_field() 1771 if isinstance(field, exp.Identifier) and not field.quoted: 1772 field = exp.var(field) 1773 1774 return field 1775 1776 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1777 self._match(TokenType.EQ) 1778 self._match(TokenType.ALIAS) 1779 1780 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1781 1782 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1783 properties = [] 1784 while True: 1785 if before: 1786 prop = self._parse_property_before() 1787 else: 1788 prop = self._parse_property() 1789 if not prop: 1790 break 1791 for p in ensure_list(prop): 1792 properties.append(p) 1793 1794 if properties: 1795 return self.expression(exp.Properties, expressions=properties) 1796 1797 return None 1798 1799 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1800 return self.expression( 1801 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1802 ) 1803 1804 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1805 if self._index >= 2: 1806 pre_volatile_token = self._tokens[self._index - 2] 1807 else: 1808 pre_volatile_token = None 1809 1810 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1811 return exp.VolatileProperty() 1812 1813 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1814 1815 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1816 self._match_pair(TokenType.EQ, TokenType.ON) 1817 1818 prop = self.expression(exp.WithSystemVersioningProperty) 1819 if self._match(TokenType.L_PAREN): 1820 self._match_text_seq("HISTORY_TABLE", "=") 1821 prop.set("this", self._parse_table_parts()) 1822 1823 if self._match(TokenType.COMMA): 1824 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1825 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1826 1827 self._match_r_paren() 1828 1829 return prop 1830 1831 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1832 if self._match(TokenType.L_PAREN, advance=False): 1833 return self._parse_wrapped_properties() 1834 1835 if self._match_text_seq("JOURNAL"): 1836 return self._parse_withjournaltable() 1837 1838 if self._match_texts(self.VIEW_ATTRIBUTES): 1839 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1840 1841 if self._match_text_seq("DATA"): 1842 return self._parse_withdata(no=False) 1843 elif self._match_text_seq("NO", "DATA"): 1844 return self._parse_withdata(no=True) 1845 1846 if not self._next: 1847 return None 1848 1849 return self._parse_withisolatedloading() 1850 1851 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1852 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1853 self._match(TokenType.EQ) 1854 1855 user = self._parse_id_var() 1856 self._match(TokenType.PARAMETER) 1857 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1858 1859 if not user or not host: 1860 return None 1861 1862 return exp.DefinerProperty(this=f"{user}@{host}") 1863 1864 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1865 self._match(TokenType.TABLE) 1866 self._match(TokenType.EQ) 1867 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1868 1869 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1870 return self.expression(exp.LogProperty, no=no) 1871 1872 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1873 return self.expression(exp.JournalProperty, **kwargs) 1874 1875 def _parse_checksum(self) -> exp.ChecksumProperty: 1876 self._match(TokenType.EQ) 1877 1878 on = None 1879 if self._match(TokenType.ON): 1880 on = True 1881 elif self._match_text_seq("OFF"): 1882 on = False 1883 1884 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1885 1886 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1887 return self.expression( 1888 exp.Cluster, 1889 expressions=( 1890 self._parse_wrapped_csv(self._parse_ordered) 1891 if wrapped 1892 else self._parse_csv(self._parse_ordered) 1893 ), 1894 ) 1895 1896 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1897 self._match_text_seq("BY") 1898 1899 self._match_l_paren() 1900 expressions = self._parse_csv(self._parse_column) 1901 self._match_r_paren() 1902 1903 if self._match_text_seq("SORTED", "BY"): 1904 self._match_l_paren() 1905 sorted_by = self._parse_csv(self._parse_ordered) 1906 self._match_r_paren() 1907 else: 1908 sorted_by = None 1909 1910 self._match(TokenType.INTO) 1911 buckets = self._parse_number() 1912 self._match_text_seq("BUCKETS") 1913 1914 return self.expression( 1915 exp.ClusteredByProperty, 1916 expressions=expressions, 1917 sorted_by=sorted_by, 1918 buckets=buckets, 1919 ) 1920 1921 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1922 if not self._match_text_seq("GRANTS"): 1923 self._retreat(self._index - 1) 1924 return None 1925 1926 return self.expression(exp.CopyGrantsProperty) 1927 1928 def _parse_freespace(self) -> exp.FreespaceProperty: 1929 self._match(TokenType.EQ) 1930 return self.expression( 1931 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1932 ) 1933 1934 def _parse_mergeblockratio( 1935 self, no: bool = False, default: bool = False 1936 ) -> exp.MergeBlockRatioProperty: 1937 if self._match(TokenType.EQ): 1938 return self.expression( 1939 exp.MergeBlockRatioProperty, 1940 this=self._parse_number(), 1941 percent=self._match(TokenType.PERCENT), 1942 ) 1943 1944 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1945 1946 def _parse_datablocksize( 1947 self, 1948 default: t.Optional[bool] = None, 1949 minimum: t.Optional[bool] = None, 1950 maximum: t.Optional[bool] = None, 1951 ) -> exp.DataBlocksizeProperty: 1952 self._match(TokenType.EQ) 1953 size = self._parse_number() 1954 1955 units = None 1956 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1957 units = self._prev.text 1958 1959 return self.expression( 1960 exp.DataBlocksizeProperty, 1961 size=size, 1962 units=units, 1963 default=default, 1964 minimum=minimum, 1965 maximum=maximum, 1966 ) 1967 1968 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1969 self._match(TokenType.EQ) 1970 always = self._match_text_seq("ALWAYS") 1971 manual = self._match_text_seq("MANUAL") 1972 never = self._match_text_seq("NEVER") 1973 default = self._match_text_seq("DEFAULT") 1974 1975 autotemp = None 1976 if self._match_text_seq("AUTOTEMP"): 1977 autotemp = self._parse_schema() 1978 1979 return self.expression( 1980 exp.BlockCompressionProperty, 1981 always=always, 1982 manual=manual, 1983 never=never, 1984 default=default, 1985 autotemp=autotemp, 1986 ) 1987 1988 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1989 index = self._index 1990 no = self._match_text_seq("NO") 1991 concurrent = self._match_text_seq("CONCURRENT") 1992 1993 if not self._match_text_seq("ISOLATED", "LOADING"): 1994 self._retreat(index) 1995 return None 1996 1997 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1998 return self.expression( 1999 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2000 ) 2001 2002 def _parse_locking(self) -> exp.LockingProperty: 2003 if self._match(TokenType.TABLE): 2004 kind = "TABLE" 2005 elif self._match(TokenType.VIEW): 2006 kind = "VIEW" 2007 elif self._match(TokenType.ROW): 2008 kind = "ROW" 2009 elif self._match_text_seq("DATABASE"): 2010 kind = "DATABASE" 2011 else: 2012 kind = None 2013 2014 if kind in ("DATABASE", "TABLE", "VIEW"): 2015 this = self._parse_table_parts() 2016 else: 2017 this = None 2018 2019 if self._match(TokenType.FOR): 2020 for_or_in = "FOR" 2021 elif self._match(TokenType.IN): 2022 for_or_in = "IN" 2023 else: 2024 for_or_in = None 2025 2026 if self._match_text_seq("ACCESS"): 2027 lock_type = "ACCESS" 2028 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2029 lock_type = "EXCLUSIVE" 2030 elif self._match_text_seq("SHARE"): 2031 lock_type = "SHARE" 2032 elif self._match_text_seq("READ"): 2033 lock_type = "READ" 2034 elif self._match_text_seq("WRITE"): 2035 lock_type = "WRITE" 2036 elif self._match_text_seq("CHECKSUM"): 2037 lock_type = "CHECKSUM" 2038 else: 2039 lock_type = None 2040 2041 override = self._match_text_seq("OVERRIDE") 2042 2043 return self.expression( 2044 exp.LockingProperty, 2045 this=this, 2046 kind=kind, 2047 for_or_in=for_or_in, 2048 lock_type=lock_type, 2049 override=override, 2050 ) 2051 2052 def _parse_partition_by(self) -> t.List[exp.Expression]: 2053 if self._match(TokenType.PARTITION_BY): 2054 return self._parse_csv(self._parse_conjunction) 2055 return [] 2056 2057 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2058 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2059 if self._match_text_seq("MINVALUE"): 2060 return exp.var("MINVALUE") 2061 if self._match_text_seq("MAXVALUE"): 2062 return exp.var("MAXVALUE") 2063 return self._parse_bitwise() 2064 2065 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2066 expression = None 2067 from_expressions = None 2068 to_expressions = None 2069 2070 if self._match(TokenType.IN): 2071 this = self._parse_wrapped_csv(self._parse_bitwise) 2072 elif self._match(TokenType.FROM): 2073 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2074 self._match_text_seq("TO") 2075 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2076 elif self._match_text_seq("WITH", "(", "MODULUS"): 2077 this = self._parse_number() 2078 self._match_text_seq(",", "REMAINDER") 2079 expression = self._parse_number() 2080 self._match_r_paren() 2081 else: 2082 self.raise_error("Failed to parse partition bound spec.") 2083 2084 return self.expression( 2085 exp.PartitionBoundSpec, 2086 this=this, 2087 expression=expression, 2088 from_expressions=from_expressions, 2089 to_expressions=to_expressions, 2090 ) 2091 2092 # https://www.postgresql.org/docs/current/sql-createtable.html 2093 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2094 if not self._match_text_seq("OF"): 2095 self._retreat(self._index - 1) 2096 return None 2097 2098 this = self._parse_table(schema=True) 2099 2100 if self._match(TokenType.DEFAULT): 2101 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2102 elif self._match_text_seq("FOR", "VALUES"): 2103 expression = self._parse_partition_bound_spec() 2104 else: 2105 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2106 2107 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2108 2109 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2110 self._match(TokenType.EQ) 2111 return self.expression( 2112 exp.PartitionedByProperty, 2113 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2114 ) 2115 2116 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2117 if self._match_text_seq("AND", "STATISTICS"): 2118 statistics = True 2119 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2120 statistics = False 2121 else: 2122 statistics = None 2123 2124 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2125 2126 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2127 if self._match_text_seq("SQL"): 2128 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2129 return None 2130 2131 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2132 if self._match_text_seq("SQL", "DATA"): 2133 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2134 return None 2135 2136 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2137 if self._match_text_seq("PRIMARY", "INDEX"): 2138 return exp.NoPrimaryIndexProperty() 2139 if self._match_text_seq("SQL"): 2140 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2141 return None 2142 2143 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2144 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2145 return exp.OnCommitProperty() 2146 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2147 return exp.OnCommitProperty(delete=True) 2148 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2149 2150 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2151 if self._match_text_seq("SQL", "DATA"): 2152 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2153 return None 2154 2155 def _parse_distkey(self) -> exp.DistKeyProperty: 2156 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2157 2158 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2159 table = self._parse_table(schema=True) 2160 2161 options = [] 2162 while self._match_texts(("INCLUDING", "EXCLUDING")): 2163 this = self._prev.text.upper() 2164 2165 id_var = self._parse_id_var() 2166 if not id_var: 2167 return None 2168 2169 options.append( 2170 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2171 ) 2172 2173 return self.expression(exp.LikeProperty, this=table, expressions=options) 2174 2175 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2176 return self.expression( 2177 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2178 ) 2179 2180 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2181 self._match(TokenType.EQ) 2182 return self.expression( 2183 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2184 ) 2185 2186 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2187 self._match_text_seq("WITH", "CONNECTION") 2188 return self.expression( 2189 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2190 ) 2191 2192 def _parse_returns(self) -> exp.ReturnsProperty: 2193 value: t.Optional[exp.Expression] 2194 is_table = self._match(TokenType.TABLE) 2195 2196 if is_table: 2197 if self._match(TokenType.LT): 2198 value = self.expression( 2199 exp.Schema, 2200 this="TABLE", 2201 expressions=self._parse_csv(self._parse_struct_types), 2202 ) 2203 if not self._match(TokenType.GT): 2204 self.raise_error("Expecting >") 2205 else: 2206 value = self._parse_schema(exp.var("TABLE")) 2207 else: 2208 value = self._parse_types() 2209 2210 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2211 2212 def _parse_describe(self) -> exp.Describe: 2213 kind = self._match_set(self.CREATABLES) and self._prev.text 2214 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2215 if self._match(TokenType.DOT): 2216 style = None 2217 self._retreat(self._index - 2) 2218 this = self._parse_table(schema=True) 2219 properties = self._parse_properties() 2220 expressions = properties.expressions if properties else None 2221 return self.expression( 2222 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2223 ) 2224 2225 def _parse_insert(self) -> exp.Insert: 2226 comments = ensure_list(self._prev_comments) 2227 hint = self._parse_hint() 2228 overwrite = self._match(TokenType.OVERWRITE) 2229 ignore = self._match(TokenType.IGNORE) 2230 local = self._match_text_seq("LOCAL") 2231 alternative = None 2232 is_function = None 2233 2234 if self._match_text_seq("DIRECTORY"): 2235 this: t.Optional[exp.Expression] = self.expression( 2236 exp.Directory, 2237 this=self._parse_var_or_string(), 2238 local=local, 2239 row_format=self._parse_row_format(match_row=True), 2240 ) 2241 else: 2242 if self._match(TokenType.OR): 2243 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2244 2245 self._match(TokenType.INTO) 2246 comments += ensure_list(self._prev_comments) 2247 self._match(TokenType.TABLE) 2248 is_function = self._match(TokenType.FUNCTION) 2249 2250 this = ( 2251 self._parse_table(schema=True, parse_partition=True) 2252 if not is_function 2253 else self._parse_function() 2254 ) 2255 2256 returning = self._parse_returning() 2257 2258 return self.expression( 2259 exp.Insert, 2260 comments=comments, 2261 hint=hint, 2262 is_function=is_function, 2263 this=this, 2264 stored=self._match_text_seq("STORED") and self._parse_stored(), 2265 by_name=self._match_text_seq("BY", "NAME"), 2266 exists=self._parse_exists(), 2267 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2268 and self._parse_conjunction(), 2269 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2270 conflict=self._parse_on_conflict(), 2271 returning=returning or self._parse_returning(), 2272 overwrite=overwrite, 2273 alternative=alternative, 2274 ignore=ignore, 2275 ) 2276 2277 def _parse_kill(self) -> exp.Kill: 2278 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2279 2280 return self.expression( 2281 exp.Kill, 2282 this=self._parse_primary(), 2283 kind=kind, 2284 ) 2285 2286 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2287 conflict = self._match_text_seq("ON", "CONFLICT") 2288 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2289 2290 if not conflict and not duplicate: 2291 return None 2292 2293 conflict_keys = None 2294 constraint = None 2295 2296 if conflict: 2297 if self._match_text_seq("ON", "CONSTRAINT"): 2298 constraint = self._parse_id_var() 2299 elif self._match(TokenType.L_PAREN): 2300 conflict_keys = self._parse_csv(self._parse_id_var) 2301 self._match_r_paren() 2302 2303 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2304 if self._prev.token_type == TokenType.UPDATE: 2305 self._match(TokenType.SET) 2306 expressions = self._parse_csv(self._parse_equality) 2307 else: 2308 expressions = None 2309 2310 return self.expression( 2311 exp.OnConflict, 2312 duplicate=duplicate, 2313 expressions=expressions, 2314 action=action, 2315 conflict_keys=conflict_keys, 2316 constraint=constraint, 2317 ) 2318 2319 def _parse_returning(self) -> t.Optional[exp.Returning]: 2320 if not self._match(TokenType.RETURNING): 2321 return None 2322 return self.expression( 2323 exp.Returning, 2324 expressions=self._parse_csv(self._parse_expression), 2325 into=self._match(TokenType.INTO) and self._parse_table_part(), 2326 ) 2327 2328 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2329 if not self._match(TokenType.FORMAT): 2330 return None 2331 return self._parse_row_format() 2332 2333 def _parse_row_format( 2334 self, match_row: bool = False 2335 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2336 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2337 return None 2338 2339 if self._match_text_seq("SERDE"): 2340 this = self._parse_string() 2341 2342 serde_properties = None 2343 if self._match(TokenType.SERDE_PROPERTIES): 2344 serde_properties = self.expression( 2345 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2346 ) 2347 2348 return self.expression( 2349 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2350 ) 2351 2352 self._match_text_seq("DELIMITED") 2353 2354 kwargs = {} 2355 2356 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2357 kwargs["fields"] = self._parse_string() 2358 if self._match_text_seq("ESCAPED", "BY"): 2359 kwargs["escaped"] = self._parse_string() 2360 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2361 kwargs["collection_items"] = self._parse_string() 2362 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2363 kwargs["map_keys"] = self._parse_string() 2364 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2365 kwargs["lines"] = self._parse_string() 2366 if self._match_text_seq("NULL", "DEFINED", "AS"): 2367 kwargs["null"] = self._parse_string() 2368 2369 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2370 2371 def _parse_load(self) -> exp.LoadData | exp.Command: 2372 if self._match_text_seq("DATA"): 2373 local = self._match_text_seq("LOCAL") 2374 self._match_text_seq("INPATH") 2375 inpath = self._parse_string() 2376 overwrite = self._match(TokenType.OVERWRITE) 2377 self._match_pair(TokenType.INTO, TokenType.TABLE) 2378 2379 return self.expression( 2380 exp.LoadData, 2381 this=self._parse_table(schema=True), 2382 local=local, 2383 overwrite=overwrite, 2384 inpath=inpath, 2385 partition=self._parse_partition(), 2386 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2387 serde=self._match_text_seq("SERDE") and self._parse_string(), 2388 ) 2389 return self._parse_as_command(self._prev) 2390 2391 def _parse_delete(self) -> exp.Delete: 2392 # This handles MySQL's "Multiple-Table Syntax" 2393 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2394 tables = None 2395 comments = self._prev_comments 2396 if not self._match(TokenType.FROM, advance=False): 2397 tables = self._parse_csv(self._parse_table) or None 2398 2399 returning = self._parse_returning() 2400 2401 return self.expression( 2402 exp.Delete, 2403 comments=comments, 2404 tables=tables, 2405 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2406 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2407 where=self._parse_where(), 2408 returning=returning or self._parse_returning(), 2409 limit=self._parse_limit(), 2410 ) 2411 2412 def _parse_update(self) -> exp.Update: 2413 comments = self._prev_comments 2414 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2415 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2416 returning = self._parse_returning() 2417 return self.expression( 2418 exp.Update, 2419 comments=comments, 2420 **{ # type: ignore 2421 "this": this, 2422 "expressions": expressions, 2423 "from": self._parse_from(joins=True), 2424 "where": self._parse_where(), 2425 "returning": returning or self._parse_returning(), 2426 "order": self._parse_order(), 2427 "limit": self._parse_limit(), 2428 }, 2429 ) 2430 2431 def _parse_uncache(self) -> exp.Uncache: 2432 if not self._match(TokenType.TABLE): 2433 self.raise_error("Expecting TABLE after UNCACHE") 2434 2435 return self.expression( 2436 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2437 ) 2438 2439 def _parse_cache(self) -> exp.Cache: 2440 lazy = self._match_text_seq("LAZY") 2441 self._match(TokenType.TABLE) 2442 table = self._parse_table(schema=True) 2443 2444 options = [] 2445 if self._match_text_seq("OPTIONS"): 2446 self._match_l_paren() 2447 k = self._parse_string() 2448 self._match(TokenType.EQ) 2449 v = self._parse_string() 2450 options = [k, v] 2451 self._match_r_paren() 2452 2453 self._match(TokenType.ALIAS) 2454 return self.expression( 2455 exp.Cache, 2456 this=table, 2457 lazy=lazy, 2458 options=options, 2459 expression=self._parse_select(nested=True), 2460 ) 2461 2462 def _parse_partition(self) -> t.Optional[exp.Partition]: 2463 if not self._match(TokenType.PARTITION): 2464 return None 2465 2466 return self.expression( 2467 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2468 ) 2469 2470 def _parse_value(self) -> exp.Tuple: 2471 if self._match(TokenType.L_PAREN): 2472 expressions = self._parse_csv(self._parse_expression) 2473 self._match_r_paren() 2474 return self.expression(exp.Tuple, expressions=expressions) 2475 2476 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2477 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2478 2479 def _parse_projections(self) -> t.List[exp.Expression]: 2480 return self._parse_expressions() 2481 2482 def _parse_select( 2483 self, 2484 nested: bool = False, 2485 table: bool = False, 2486 parse_subquery_alias: bool = True, 2487 parse_set_operation: bool = True, 2488 ) -> t.Optional[exp.Expression]: 2489 cte = self._parse_with() 2490 2491 if cte: 2492 this = self._parse_statement() 2493 2494 if not this: 2495 self.raise_error("Failed to parse any statement following CTE") 2496 return cte 2497 2498 if "with" in this.arg_types: 2499 this.set("with", cte) 2500 else: 2501 self.raise_error(f"{this.key} does not support CTE") 2502 this = cte 2503 2504 return this 2505 2506 # duckdb supports leading with FROM x 2507 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2508 2509 if self._match(TokenType.SELECT): 2510 comments = self._prev_comments 2511 2512 hint = self._parse_hint() 2513 all_ = self._match(TokenType.ALL) 2514 distinct = self._match_set(self.DISTINCT_TOKENS) 2515 2516 kind = ( 2517 self._match(TokenType.ALIAS) 2518 and self._match_texts(("STRUCT", "VALUE")) 2519 and self._prev.text.upper() 2520 ) 2521 2522 if distinct: 2523 distinct = self.expression( 2524 exp.Distinct, 2525 on=self._parse_value() if self._match(TokenType.ON) else None, 2526 ) 2527 2528 if all_ and distinct: 2529 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2530 2531 limit = self._parse_limit(top=True) 2532 projections = self._parse_projections() 2533 2534 this = self.expression( 2535 exp.Select, 2536 kind=kind, 2537 hint=hint, 2538 distinct=distinct, 2539 expressions=projections, 2540 limit=limit, 2541 ) 2542 this.comments = comments 2543 2544 into = self._parse_into() 2545 if into: 2546 this.set("into", into) 2547 2548 if not from_: 2549 from_ = self._parse_from() 2550 2551 if from_: 2552 this.set("from", from_) 2553 2554 this = self._parse_query_modifiers(this) 2555 elif (table or nested) and self._match(TokenType.L_PAREN): 2556 if self._match(TokenType.PIVOT): 2557 this = self._parse_simplified_pivot() 2558 elif self._match(TokenType.FROM): 2559 this = exp.select("*").from_( 2560 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2561 ) 2562 else: 2563 this = ( 2564 self._parse_table() 2565 if table 2566 else self._parse_select(nested=True, parse_set_operation=False) 2567 ) 2568 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2569 2570 self._match_r_paren() 2571 2572 # We return early here so that the UNION isn't attached to the subquery by the 2573 # following call to _parse_set_operations, but instead becomes the parent node 2574 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2575 elif self._match(TokenType.VALUES, advance=False): 2576 this = self._parse_derived_table_values() 2577 elif from_: 2578 this = exp.select("*").from_(from_.this, copy=False) 2579 else: 2580 this = None 2581 2582 if parse_set_operation: 2583 return self._parse_set_operations(this) 2584 return this 2585 2586 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2587 if not skip_with_token and not self._match(TokenType.WITH): 2588 return None 2589 2590 comments = self._prev_comments 2591 recursive = self._match(TokenType.RECURSIVE) 2592 2593 expressions = [] 2594 while True: 2595 expressions.append(self._parse_cte()) 2596 2597 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2598 break 2599 else: 2600 self._match(TokenType.WITH) 2601 2602 return self.expression( 2603 exp.With, comments=comments, expressions=expressions, recursive=recursive 2604 ) 2605 2606 def _parse_cte(self) -> exp.CTE: 2607 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2608 if not alias or not alias.this: 2609 self.raise_error("Expected CTE to have alias") 2610 2611 self._match(TokenType.ALIAS) 2612 2613 if self._match_text_seq("NOT", "MATERIALIZED"): 2614 materialized = False 2615 elif self._match_text_seq("MATERIALIZED"): 2616 materialized = True 2617 else: 2618 materialized = None 2619 2620 return self.expression( 2621 exp.CTE, 2622 this=self._parse_wrapped(self._parse_statement), 2623 alias=alias, 2624 materialized=materialized, 2625 ) 2626 2627 def _parse_table_alias( 2628 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2629 ) -> t.Optional[exp.TableAlias]: 2630 any_token = self._match(TokenType.ALIAS) 2631 alias = ( 2632 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2633 or self._parse_string_as_identifier() 2634 ) 2635 2636 index = self._index 2637 if self._match(TokenType.L_PAREN): 2638 columns = self._parse_csv(self._parse_function_parameter) 2639 self._match_r_paren() if columns else self._retreat(index) 2640 else: 2641 columns = None 2642 2643 if not alias and not columns: 2644 return None 2645 2646 return self.expression(exp.TableAlias, this=alias, columns=columns) 2647 2648 def _parse_subquery( 2649 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2650 ) -> t.Optional[exp.Subquery]: 2651 if not this: 2652 return None 2653 2654 return self.expression( 2655 exp.Subquery, 2656 this=this, 2657 pivots=self._parse_pivots(), 2658 alias=self._parse_table_alias() if parse_alias else None, 2659 ) 2660 2661 def _implicit_unnests_to_explicit(self, this: E) -> E: 2662 from sqlglot.optimizer.normalize_identifiers import ( 2663 normalize_identifiers as _norm, 2664 ) 2665 2666 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2667 for i, join in enumerate(this.args.get("joins") or []): 2668 table = join.this 2669 normalized_table = table.copy() 2670 normalized_table.meta["maybe_column"] = True 2671 normalized_table = _norm(normalized_table, dialect=self.dialect) 2672 2673 if isinstance(table, exp.Table) and not join.args.get("on"): 2674 if normalized_table.parts[0].name in refs: 2675 table_as_column = table.to_column() 2676 unnest = exp.Unnest(expressions=[table_as_column]) 2677 2678 # Table.to_column creates a parent Alias node that we want to convert to 2679 # a TableAlias and attach to the Unnest, so it matches the parser's output 2680 if isinstance(table.args.get("alias"), exp.TableAlias): 2681 table_as_column.replace(table_as_column.this) 2682 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2683 2684 table.replace(unnest) 2685 2686 refs.add(normalized_table.alias_or_name) 2687 2688 return this 2689 2690 def _parse_query_modifiers( 2691 self, this: t.Optional[exp.Expression] 2692 ) -> t.Optional[exp.Expression]: 2693 if isinstance(this, (exp.Query, exp.Table)): 2694 for join in self._parse_joins(): 2695 this.append("joins", join) 2696 for lateral in iter(self._parse_lateral, None): 2697 this.append("laterals", lateral) 2698 2699 while True: 2700 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2701 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2702 key, expression = parser(self) 2703 2704 if expression: 2705 this.set(key, expression) 2706 if key == "limit": 2707 offset = expression.args.pop("offset", None) 2708 2709 if offset: 2710 offset = exp.Offset(expression=offset) 2711 this.set("offset", offset) 2712 2713 limit_by_expressions = expression.expressions 2714 expression.set("expressions", None) 2715 offset.set("expressions", limit_by_expressions) 2716 continue 2717 break 2718 2719 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2720 this = self._implicit_unnests_to_explicit(this) 2721 2722 return this 2723 2724 def _parse_hint(self) -> t.Optional[exp.Hint]: 2725 if self._match(TokenType.HINT): 2726 hints = [] 2727 for hint in iter( 2728 lambda: self._parse_csv( 2729 lambda: self._parse_function() or self._parse_var(upper=True) 2730 ), 2731 [], 2732 ): 2733 hints.extend(hint) 2734 2735 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2736 self.raise_error("Expected */ after HINT") 2737 2738 return self.expression(exp.Hint, expressions=hints) 2739 2740 return None 2741 2742 def _parse_into(self) -> t.Optional[exp.Into]: 2743 if not self._match(TokenType.INTO): 2744 return None 2745 2746 temp = self._match(TokenType.TEMPORARY) 2747 unlogged = self._match_text_seq("UNLOGGED") 2748 self._match(TokenType.TABLE) 2749 2750 return self.expression( 2751 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2752 ) 2753 2754 def _parse_from( 2755 self, joins: bool = False, skip_from_token: bool = False 2756 ) -> t.Optional[exp.From]: 2757 if not skip_from_token and not self._match(TokenType.FROM): 2758 return None 2759 2760 return self.expression( 2761 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2762 ) 2763 2764 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2765 return self.expression( 2766 exp.MatchRecognizeMeasure, 2767 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2768 this=self._parse_expression(), 2769 ) 2770 2771 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2772 if not self._match(TokenType.MATCH_RECOGNIZE): 2773 return None 2774 2775 self._match_l_paren() 2776 2777 partition = self._parse_partition_by() 2778 order = self._parse_order() 2779 2780 measures = ( 2781 self._parse_csv(self._parse_match_recognize_measure) 2782 if self._match_text_seq("MEASURES") 2783 else None 2784 ) 2785 2786 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2787 rows = exp.var("ONE ROW PER MATCH") 2788 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2789 text = "ALL ROWS PER MATCH" 2790 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2791 text += " SHOW EMPTY MATCHES" 2792 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2793 text += " OMIT EMPTY MATCHES" 2794 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2795 text += " WITH UNMATCHED ROWS" 2796 rows = exp.var(text) 2797 else: 2798 rows = None 2799 2800 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2801 text = "AFTER MATCH SKIP" 2802 if self._match_text_seq("PAST", "LAST", "ROW"): 2803 text += " PAST LAST ROW" 2804 elif self._match_text_seq("TO", "NEXT", "ROW"): 2805 text += " TO NEXT ROW" 2806 elif self._match_text_seq("TO", "FIRST"): 2807 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2808 elif self._match_text_seq("TO", "LAST"): 2809 text += f" TO LAST {self._advance_any().text}" # type: ignore 2810 after = exp.var(text) 2811 else: 2812 after = None 2813 2814 if self._match_text_seq("PATTERN"): 2815 self._match_l_paren() 2816 2817 if not self._curr: 2818 self.raise_error("Expecting )", self._curr) 2819 2820 paren = 1 2821 start = self._curr 2822 2823 while self._curr and paren > 0: 2824 if self._curr.token_type == TokenType.L_PAREN: 2825 paren += 1 2826 if self._curr.token_type == TokenType.R_PAREN: 2827 paren -= 1 2828 2829 end = self._prev 2830 self._advance() 2831 2832 if paren > 0: 2833 self.raise_error("Expecting )", self._curr) 2834 2835 pattern = exp.var(self._find_sql(start, end)) 2836 else: 2837 pattern = None 2838 2839 define = ( 2840 self._parse_csv(self._parse_name_as_expression) 2841 if self._match_text_seq("DEFINE") 2842 else None 2843 ) 2844 2845 self._match_r_paren() 2846 2847 return self.expression( 2848 exp.MatchRecognize, 2849 partition_by=partition, 2850 order=order, 2851 measures=measures, 2852 rows=rows, 2853 after=after, 2854 pattern=pattern, 2855 define=define, 2856 alias=self._parse_table_alias(), 2857 ) 2858 2859 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2860 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2861 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2862 cross_apply = False 2863 2864 if cross_apply is not None: 2865 this = self._parse_select(table=True) 2866 view = None 2867 outer = None 2868 elif self._match(TokenType.LATERAL): 2869 this = self._parse_select(table=True) 2870 view = self._match(TokenType.VIEW) 2871 outer = self._match(TokenType.OUTER) 2872 else: 2873 return None 2874 2875 if not this: 2876 this = ( 2877 self._parse_unnest() 2878 or self._parse_function() 2879 or self._parse_id_var(any_token=False) 2880 ) 2881 2882 while self._match(TokenType.DOT): 2883 this = exp.Dot( 2884 this=this, 2885 expression=self._parse_function() or self._parse_id_var(any_token=False), 2886 ) 2887 2888 if view: 2889 table = self._parse_id_var(any_token=False) 2890 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2891 table_alias: t.Optional[exp.TableAlias] = self.expression( 2892 exp.TableAlias, this=table, columns=columns 2893 ) 2894 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2895 # We move the alias from the lateral's child node to the lateral itself 2896 table_alias = this.args["alias"].pop() 2897 else: 2898 table_alias = self._parse_table_alias() 2899 2900 return self.expression( 2901 exp.Lateral, 2902 this=this, 2903 view=view, 2904 outer=outer, 2905 alias=table_alias, 2906 cross_apply=cross_apply, 2907 ) 2908 2909 def _parse_join_parts( 2910 self, 2911 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2912 return ( 2913 self._match_set(self.JOIN_METHODS) and self._prev, 2914 self._match_set(self.JOIN_SIDES) and self._prev, 2915 self._match_set(self.JOIN_KINDS) and self._prev, 2916 ) 2917 2918 def _parse_join( 2919 self, skip_join_token: bool = False, parse_bracket: bool = False 2920 ) -> t.Optional[exp.Join]: 2921 if self._match(TokenType.COMMA): 2922 return self.expression(exp.Join, this=self._parse_table()) 2923 2924 index = self._index 2925 method, side, kind = self._parse_join_parts() 2926 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2927 join = self._match(TokenType.JOIN) 2928 2929 if not skip_join_token and not join: 2930 self._retreat(index) 2931 kind = None 2932 method = None 2933 side = None 2934 2935 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2936 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2937 2938 if not skip_join_token and not join and not outer_apply and not cross_apply: 2939 return None 2940 2941 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2942 2943 if method: 2944 kwargs["method"] = method.text 2945 if side: 2946 kwargs["side"] = side.text 2947 if kind: 2948 kwargs["kind"] = kind.text 2949 if hint: 2950 kwargs["hint"] = hint 2951 2952 if self._match(TokenType.MATCH_CONDITION): 2953 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2954 2955 if self._match(TokenType.ON): 2956 kwargs["on"] = self._parse_conjunction() 2957 elif self._match(TokenType.USING): 2958 kwargs["using"] = self._parse_wrapped_id_vars() 2959 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2960 kind and kind.token_type == TokenType.CROSS 2961 ): 2962 index = self._index 2963 joins: t.Optional[list] = list(self._parse_joins()) 2964 2965 if joins and self._match(TokenType.ON): 2966 kwargs["on"] = self._parse_conjunction() 2967 elif joins and self._match(TokenType.USING): 2968 kwargs["using"] = self._parse_wrapped_id_vars() 2969 else: 2970 joins = None 2971 self._retreat(index) 2972 2973 kwargs["this"].set("joins", joins if joins else None) 2974 2975 comments = [c for token in (method, side, kind) if token for c in token.comments] 2976 return self.expression(exp.Join, comments=comments, **kwargs) 2977 2978 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2979 this = self._parse_conjunction() 2980 2981 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2982 return this 2983 2984 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2985 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2986 2987 return this 2988 2989 def _parse_index_params(self) -> exp.IndexParameters: 2990 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2991 2992 if self._match(TokenType.L_PAREN, advance=False): 2993 columns = self._parse_wrapped_csv(self._parse_with_operator) 2994 else: 2995 columns = None 2996 2997 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2998 partition_by = self._parse_partition_by() 2999 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3000 tablespace = ( 3001 self._parse_var(any_token=True) 3002 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3003 else None 3004 ) 3005 where = self._parse_where() 3006 3007 return self.expression( 3008 exp.IndexParameters, 3009 using=using, 3010 columns=columns, 3011 include=include, 3012 partition_by=partition_by, 3013 where=where, 3014 with_storage=with_storage, 3015 tablespace=tablespace, 3016 ) 3017 3018 def _parse_index( 3019 self, 3020 index: t.Optional[exp.Expression] = None, 3021 ) -> t.Optional[exp.Index]: 3022 if index: 3023 unique = None 3024 primary = None 3025 amp = None 3026 3027 self._match(TokenType.ON) 3028 self._match(TokenType.TABLE) # hive 3029 table = self._parse_table_parts(schema=True) 3030 else: 3031 unique = self._match(TokenType.UNIQUE) 3032 primary = self._match_text_seq("PRIMARY") 3033 amp = self._match_text_seq("AMP") 3034 3035 if not self._match(TokenType.INDEX): 3036 return None 3037 3038 index = self._parse_id_var() 3039 table = None 3040 3041 params = self._parse_index_params() 3042 3043 return self.expression( 3044 exp.Index, 3045 this=index, 3046 table=table, 3047 unique=unique, 3048 primary=primary, 3049 amp=amp, 3050 params=params, 3051 ) 3052 3053 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3054 hints: t.List[exp.Expression] = [] 3055 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3056 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3057 hints.append( 3058 self.expression( 3059 exp.WithTableHint, 3060 expressions=self._parse_csv( 3061 lambda: self._parse_function() or self._parse_var(any_token=True) 3062 ), 3063 ) 3064 ) 3065 self._match_r_paren() 3066 else: 3067 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3068 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3069 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3070 3071 self._match_texts(("INDEX", "KEY")) 3072 if self._match(TokenType.FOR): 3073 hint.set("target", self._advance_any() and self._prev.text.upper()) 3074 3075 hint.set("expressions", self._parse_wrapped_id_vars()) 3076 hints.append(hint) 3077 3078 return hints or None 3079 3080 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3081 return ( 3082 (not schema and self._parse_function(optional_parens=False)) 3083 or self._parse_id_var(any_token=False) 3084 or self._parse_string_as_identifier() 3085 or self._parse_placeholder() 3086 ) 3087 3088 def _parse_table_parts( 3089 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3090 ) -> exp.Table: 3091 catalog = None 3092 db = None 3093 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3094 3095 while self._match(TokenType.DOT): 3096 if catalog: 3097 # This allows nesting the table in arbitrarily many dot expressions if needed 3098 table = self.expression( 3099 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3100 ) 3101 else: 3102 catalog = db 3103 db = table 3104 # "" used for tsql FROM a..b case 3105 table = self._parse_table_part(schema=schema) or "" 3106 3107 if ( 3108 wildcard 3109 and self._is_connected() 3110 and (isinstance(table, exp.Identifier) or not table) 3111 and self._match(TokenType.STAR) 3112 ): 3113 if isinstance(table, exp.Identifier): 3114 table.args["this"] += "*" 3115 else: 3116 table = exp.Identifier(this="*") 3117 3118 # We bubble up comments from the Identifier to the Table 3119 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3120 3121 if is_db_reference: 3122 catalog = db 3123 db = table 3124 table = None 3125 3126 if not table and not is_db_reference: 3127 self.raise_error(f"Expected table name but got {self._curr}") 3128 if not db and is_db_reference: 3129 self.raise_error(f"Expected database name but got {self._curr}") 3130 3131 return self.expression( 3132 exp.Table, 3133 comments=comments, 3134 this=table, 3135 db=db, 3136 catalog=catalog, 3137 pivots=self._parse_pivots(), 3138 ) 3139 3140 def _parse_table( 3141 self, 3142 schema: bool = False, 3143 joins: bool = False, 3144 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3145 parse_bracket: bool = False, 3146 is_db_reference: bool = False, 3147 parse_partition: bool = False, 3148 ) -> t.Optional[exp.Expression]: 3149 lateral = self._parse_lateral() 3150 if lateral: 3151 return lateral 3152 3153 unnest = self._parse_unnest() 3154 if unnest: 3155 return unnest 3156 3157 values = self._parse_derived_table_values() 3158 if values: 3159 return values 3160 3161 subquery = self._parse_select(table=True) 3162 if subquery: 3163 if not subquery.args.get("pivots"): 3164 subquery.set("pivots", self._parse_pivots()) 3165 return subquery 3166 3167 bracket = parse_bracket and self._parse_bracket(None) 3168 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3169 3170 only = self._match(TokenType.ONLY) 3171 3172 this = t.cast( 3173 exp.Expression, 3174 bracket 3175 or self._parse_bracket( 3176 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3177 ), 3178 ) 3179 3180 if only: 3181 this.set("only", only) 3182 3183 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3184 self._match_text_seq("*") 3185 3186 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3187 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3188 this.set("partition", self._parse_partition()) 3189 3190 if schema: 3191 return self._parse_schema(this=this) 3192 3193 version = self._parse_version() 3194 3195 if version: 3196 this.set("version", version) 3197 3198 if self.dialect.ALIAS_POST_TABLESAMPLE: 3199 table_sample = self._parse_table_sample() 3200 3201 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3202 if alias: 3203 this.set("alias", alias) 3204 3205 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3206 return self.expression( 3207 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3208 ) 3209 3210 this.set("hints", self._parse_table_hints()) 3211 3212 if not this.args.get("pivots"): 3213 this.set("pivots", self._parse_pivots()) 3214 3215 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3216 table_sample = self._parse_table_sample() 3217 3218 if table_sample: 3219 table_sample.set("this", this) 3220 this = table_sample 3221 3222 if joins: 3223 for join in self._parse_joins(): 3224 this.append("joins", join) 3225 3226 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3227 this.set("ordinality", True) 3228 this.set("alias", self._parse_table_alias()) 3229 3230 return this 3231 3232 def _parse_version(self) -> t.Optional[exp.Version]: 3233 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3234 this = "TIMESTAMP" 3235 elif self._match(TokenType.VERSION_SNAPSHOT): 3236 this = "VERSION" 3237 else: 3238 return None 3239 3240 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3241 kind = self._prev.text.upper() 3242 start = self._parse_bitwise() 3243 self._match_texts(("TO", "AND")) 3244 end = self._parse_bitwise() 3245 expression: t.Optional[exp.Expression] = self.expression( 3246 exp.Tuple, expressions=[start, end] 3247 ) 3248 elif self._match_text_seq("CONTAINED", "IN"): 3249 kind = "CONTAINED IN" 3250 expression = self.expression( 3251 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3252 ) 3253 elif self._match(TokenType.ALL): 3254 kind = "ALL" 3255 expression = None 3256 else: 3257 self._match_text_seq("AS", "OF") 3258 kind = "AS OF" 3259 expression = self._parse_type() 3260 3261 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3262 3263 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3264 if not self._match(TokenType.UNNEST): 3265 return None 3266 3267 expressions = self._parse_wrapped_csv(self._parse_equality) 3268 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3269 3270 alias = self._parse_table_alias() if with_alias else None 3271 3272 if alias: 3273 if self.dialect.UNNEST_COLUMN_ONLY: 3274 if alias.args.get("columns"): 3275 self.raise_error("Unexpected extra column alias in unnest.") 3276 3277 alias.set("columns", [alias.this]) 3278 alias.set("this", None) 3279 3280 columns = alias.args.get("columns") or [] 3281 if offset and len(expressions) < len(columns): 3282 offset = columns.pop() 3283 3284 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3285 self._match(TokenType.ALIAS) 3286 offset = self._parse_id_var( 3287 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3288 ) or exp.to_identifier("offset") 3289 3290 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3291 3292 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3293 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3294 if not is_derived and not self._match_text_seq("VALUES"): 3295 return None 3296 3297 expressions = self._parse_csv(self._parse_value) 3298 alias = self._parse_table_alias() 3299 3300 if is_derived: 3301 self._match_r_paren() 3302 3303 return self.expression( 3304 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3305 ) 3306 3307 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3308 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3309 as_modifier and self._match_text_seq("USING", "SAMPLE") 3310 ): 3311 return None 3312 3313 bucket_numerator = None 3314 bucket_denominator = None 3315 bucket_field = None 3316 percent = None 3317 size = None 3318 seed = None 3319 3320 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3321 matched_l_paren = self._match(TokenType.L_PAREN) 3322 3323 if self.TABLESAMPLE_CSV: 3324 num = None 3325 expressions = self._parse_csv(self._parse_primary) 3326 else: 3327 expressions = None 3328 num = ( 3329 self._parse_factor() 3330 if self._match(TokenType.NUMBER, advance=False) 3331 else self._parse_primary() or self._parse_placeholder() 3332 ) 3333 3334 if self._match_text_seq("BUCKET"): 3335 bucket_numerator = self._parse_number() 3336 self._match_text_seq("OUT", "OF") 3337 bucket_denominator = bucket_denominator = self._parse_number() 3338 self._match(TokenType.ON) 3339 bucket_field = self._parse_field() 3340 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3341 percent = num 3342 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3343 size = num 3344 else: 3345 percent = num 3346 3347 if matched_l_paren: 3348 self._match_r_paren() 3349 3350 if self._match(TokenType.L_PAREN): 3351 method = self._parse_var(upper=True) 3352 seed = self._match(TokenType.COMMA) and self._parse_number() 3353 self._match_r_paren() 3354 elif self._match_texts(("SEED", "REPEATABLE")): 3355 seed = self._parse_wrapped(self._parse_number) 3356 3357 return self.expression( 3358 exp.TableSample, 3359 expressions=expressions, 3360 method=method, 3361 bucket_numerator=bucket_numerator, 3362 bucket_denominator=bucket_denominator, 3363 bucket_field=bucket_field, 3364 percent=percent, 3365 size=size, 3366 seed=seed, 3367 ) 3368 3369 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3370 return list(iter(self._parse_pivot, None)) or None 3371 3372 def _parse_joins(self) -> t.Iterator[exp.Join]: 3373 return iter(self._parse_join, None) 3374 3375 # https://duckdb.org/docs/sql/statements/pivot 3376 def _parse_simplified_pivot(self) -> exp.Pivot: 3377 def _parse_on() -> t.Optional[exp.Expression]: 3378 this = self._parse_bitwise() 3379 return self._parse_in(this) if self._match(TokenType.IN) else this 3380 3381 this = self._parse_table() 3382 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3383 using = self._match(TokenType.USING) and self._parse_csv( 3384 lambda: self._parse_alias(self._parse_function()) 3385 ) 3386 group = self._parse_group() 3387 return self.expression( 3388 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3389 ) 3390 3391 def _parse_pivot_in(self) -> exp.In: 3392 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3393 this = self._parse_conjunction() 3394 3395 self._match(TokenType.ALIAS) 3396 alias = self._parse_field() 3397 if alias: 3398 return self.expression(exp.PivotAlias, this=this, alias=alias) 3399 3400 return this 3401 3402 value = self._parse_column() 3403 3404 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3405 self.raise_error("Expecting IN (") 3406 3407 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3408 3409 self._match_r_paren() 3410 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3411 3412 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3413 index = self._index 3414 include_nulls = None 3415 3416 if self._match(TokenType.PIVOT): 3417 unpivot = False 3418 elif self._match(TokenType.UNPIVOT): 3419 unpivot = True 3420 3421 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3422 if self._match_text_seq("INCLUDE", "NULLS"): 3423 include_nulls = True 3424 elif self._match_text_seq("EXCLUDE", "NULLS"): 3425 include_nulls = False 3426 else: 3427 return None 3428 3429 expressions = [] 3430 3431 if not self._match(TokenType.L_PAREN): 3432 self._retreat(index) 3433 return None 3434 3435 if unpivot: 3436 expressions = self._parse_csv(self._parse_column) 3437 else: 3438 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3439 3440 if not expressions: 3441 self.raise_error("Failed to parse PIVOT's aggregation list") 3442 3443 if not self._match(TokenType.FOR): 3444 self.raise_error("Expecting FOR") 3445 3446 field = self._parse_pivot_in() 3447 3448 self._match_r_paren() 3449 3450 pivot = self.expression( 3451 exp.Pivot, 3452 expressions=expressions, 3453 field=field, 3454 unpivot=unpivot, 3455 include_nulls=include_nulls, 3456 ) 3457 3458 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3459 pivot.set("alias", self._parse_table_alias()) 3460 3461 if not unpivot: 3462 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3463 3464 columns: t.List[exp.Expression] = [] 3465 for fld in pivot.args["field"].expressions: 3466 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3467 for name in names: 3468 if self.PREFIXED_PIVOT_COLUMNS: 3469 name = f"{name}_{field_name}" if name else field_name 3470 else: 3471 name = f"{field_name}_{name}" if name else field_name 3472 3473 columns.append(exp.to_identifier(name)) 3474 3475 pivot.set("columns", columns) 3476 3477 return pivot 3478 3479 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3480 return [agg.alias for agg in aggregations] 3481 3482 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3483 if not skip_where_token and not self._match(TokenType.PREWHERE): 3484 return None 3485 3486 return self.expression( 3487 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3488 ) 3489 3490 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3491 if not skip_where_token and not self._match(TokenType.WHERE): 3492 return None 3493 3494 return self.expression( 3495 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3496 ) 3497 3498 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3499 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3500 return None 3501 3502 elements: t.Dict[str, t.Any] = defaultdict(list) 3503 3504 if self._match(TokenType.ALL): 3505 elements["all"] = True 3506 elif self._match(TokenType.DISTINCT): 3507 elements["all"] = False 3508 3509 while True: 3510 expressions = self._parse_csv(self._parse_conjunction) 3511 if expressions: 3512 elements["expressions"].extend(expressions) 3513 3514 grouping_sets = self._parse_grouping_sets() 3515 if grouping_sets: 3516 elements["grouping_sets"].extend(grouping_sets) 3517 3518 rollup = None 3519 cube = None 3520 totals = None 3521 3522 index = self._index 3523 with_ = self._match(TokenType.WITH) 3524 if self._match(TokenType.ROLLUP): 3525 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3526 elements["rollup"].extend(ensure_list(rollup)) 3527 3528 if self._match(TokenType.CUBE): 3529 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3530 elements["cube"].extend(ensure_list(cube)) 3531 3532 if self._match_text_seq("TOTALS"): 3533 totals = True 3534 elements["totals"] = True # type: ignore 3535 3536 if not (grouping_sets or rollup or cube or totals): 3537 if with_: 3538 self._retreat(index) 3539 break 3540 3541 return self.expression(exp.Group, **elements) # type: ignore 3542 3543 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3544 if not self._match(TokenType.GROUPING_SETS): 3545 return None 3546 3547 return self._parse_wrapped_csv(self._parse_grouping_set) 3548 3549 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3550 if self._match(TokenType.L_PAREN): 3551 grouping_set = self._parse_csv(self._parse_column) 3552 self._match_r_paren() 3553 return self.expression(exp.Tuple, expressions=grouping_set) 3554 3555 return self._parse_column() 3556 3557 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3558 if not skip_having_token and not self._match(TokenType.HAVING): 3559 return None 3560 return self.expression(exp.Having, this=self._parse_conjunction()) 3561 3562 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3563 if not self._match(TokenType.QUALIFY): 3564 return None 3565 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3566 3567 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3568 if skip_start_token: 3569 start = None 3570 elif self._match(TokenType.START_WITH): 3571 start = self._parse_conjunction() 3572 else: 3573 return None 3574 3575 self._match(TokenType.CONNECT_BY) 3576 nocycle = self._match_text_seq("NOCYCLE") 3577 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3578 exp.Prior, this=self._parse_bitwise() 3579 ) 3580 connect = self._parse_conjunction() 3581 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3582 3583 if not start and self._match(TokenType.START_WITH): 3584 start = self._parse_conjunction() 3585 3586 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3587 3588 def _parse_name_as_expression(self) -> exp.Alias: 3589 return self.expression( 3590 exp.Alias, 3591 alias=self._parse_id_var(any_token=True), 3592 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3593 ) 3594 3595 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3596 if self._match_text_seq("INTERPOLATE"): 3597 return self._parse_wrapped_csv(self._parse_name_as_expression) 3598 return None 3599 3600 def _parse_order( 3601 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3602 ) -> t.Optional[exp.Expression]: 3603 siblings = None 3604 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3605 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3606 return this 3607 3608 siblings = True 3609 3610 return self.expression( 3611 exp.Order, 3612 this=this, 3613 expressions=self._parse_csv(self._parse_ordered), 3614 interpolate=self._parse_interpolate(), 3615 siblings=siblings, 3616 ) 3617 3618 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3619 if not self._match(token): 3620 return None 3621 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3622 3623 def _parse_ordered( 3624 self, parse_method: t.Optional[t.Callable] = None 3625 ) -> t.Optional[exp.Ordered]: 3626 this = parse_method() if parse_method else self._parse_conjunction() 3627 if not this: 3628 return None 3629 3630 asc = self._match(TokenType.ASC) 3631 desc = self._match(TokenType.DESC) or (asc and False) 3632 3633 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3634 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3635 3636 nulls_first = is_nulls_first or False 3637 explicitly_null_ordered = is_nulls_first or is_nulls_last 3638 3639 if ( 3640 not explicitly_null_ordered 3641 and ( 3642 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3643 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3644 ) 3645 and self.dialect.NULL_ORDERING != "nulls_are_last" 3646 ): 3647 nulls_first = True 3648 3649 if self._match_text_seq("WITH", "FILL"): 3650 with_fill = self.expression( 3651 exp.WithFill, 3652 **{ # type: ignore 3653 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3654 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3655 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3656 }, 3657 ) 3658 else: 3659 with_fill = None 3660 3661 return self.expression( 3662 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3663 ) 3664 3665 def _parse_limit( 3666 self, 3667 this: t.Optional[exp.Expression] = None, 3668 top: bool = False, 3669 skip_limit_token: bool = False, 3670 ) -> t.Optional[exp.Expression]: 3671 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3672 comments = self._prev_comments 3673 if top: 3674 limit_paren = self._match(TokenType.L_PAREN) 3675 expression = self._parse_term() if limit_paren else self._parse_number() 3676 3677 if limit_paren: 3678 self._match_r_paren() 3679 else: 3680 expression = self._parse_term() 3681 3682 if self._match(TokenType.COMMA): 3683 offset = expression 3684 expression = self._parse_term() 3685 else: 3686 offset = None 3687 3688 limit_exp = self.expression( 3689 exp.Limit, 3690 this=this, 3691 expression=expression, 3692 offset=offset, 3693 comments=comments, 3694 expressions=self._parse_limit_by(), 3695 ) 3696 3697 return limit_exp 3698 3699 if self._match(TokenType.FETCH): 3700 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3701 direction = self._prev.text.upper() if direction else "FIRST" 3702 3703 count = self._parse_field(tokens=self.FETCH_TOKENS) 3704 percent = self._match(TokenType.PERCENT) 3705 3706 self._match_set((TokenType.ROW, TokenType.ROWS)) 3707 3708 only = self._match_text_seq("ONLY") 3709 with_ties = self._match_text_seq("WITH", "TIES") 3710 3711 if only and with_ties: 3712 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3713 3714 return self.expression( 3715 exp.Fetch, 3716 direction=direction, 3717 count=count, 3718 percent=percent, 3719 with_ties=with_ties, 3720 ) 3721 3722 return this 3723 3724 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3725 if not self._match(TokenType.OFFSET): 3726 return this 3727 3728 count = self._parse_term() 3729 self._match_set((TokenType.ROW, TokenType.ROWS)) 3730 3731 return self.expression( 3732 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3733 ) 3734 3735 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3736 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3737 3738 def _parse_locks(self) -> t.List[exp.Lock]: 3739 locks = [] 3740 while True: 3741 if self._match_text_seq("FOR", "UPDATE"): 3742 update = True 3743 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3744 "LOCK", "IN", "SHARE", "MODE" 3745 ): 3746 update = False 3747 else: 3748 break 3749 3750 expressions = None 3751 if self._match_text_seq("OF"): 3752 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3753 3754 wait: t.Optional[bool | exp.Expression] = None 3755 if self._match_text_seq("NOWAIT"): 3756 wait = True 3757 elif self._match_text_seq("WAIT"): 3758 wait = self._parse_primary() 3759 elif self._match_text_seq("SKIP", "LOCKED"): 3760 wait = False 3761 3762 locks.append( 3763 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3764 ) 3765 3766 return locks 3767 3768 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3769 while this and self._match_set(self.SET_OPERATIONS): 3770 token_type = self._prev.token_type 3771 3772 if token_type == TokenType.UNION: 3773 operation = exp.Union 3774 elif token_type == TokenType.EXCEPT: 3775 operation = exp.Except 3776 else: 3777 operation = exp.Intersect 3778 3779 comments = self._prev.comments 3780 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3781 by_name = self._match_text_seq("BY", "NAME") 3782 expression = self._parse_select(nested=True, parse_set_operation=False) 3783 3784 this = self.expression( 3785 operation, 3786 comments=comments, 3787 this=this, 3788 distinct=distinct, 3789 by_name=by_name, 3790 expression=expression, 3791 ) 3792 3793 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3794 expression = this.expression 3795 3796 if expression: 3797 for arg in self.UNION_MODIFIERS: 3798 expr = expression.args.get(arg) 3799 if expr: 3800 this.set(arg, expr.pop()) 3801 3802 return this 3803 3804 def _parse_expression(self) -> t.Optional[exp.Expression]: 3805 return self._parse_alias(self._parse_conjunction()) 3806 3807 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3808 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3809 3810 def _parse_equality(self) -> t.Optional[exp.Expression]: 3811 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3812 3813 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3814 return self._parse_tokens(self._parse_range, self.COMPARISON) 3815 3816 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3817 this = this or self._parse_bitwise() 3818 negate = self._match(TokenType.NOT) 3819 3820 if self._match_set(self.RANGE_PARSERS): 3821 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3822 if not expression: 3823 return this 3824 3825 this = expression 3826 elif self._match(TokenType.ISNULL): 3827 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3828 3829 # Postgres supports ISNULL and NOTNULL for conditions. 3830 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3831 if self._match(TokenType.NOTNULL): 3832 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3833 this = self.expression(exp.Not, this=this) 3834 3835 if negate: 3836 this = self.expression(exp.Not, this=this) 3837 3838 if self._match(TokenType.IS): 3839 this = self._parse_is(this) 3840 3841 return this 3842 3843 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3844 index = self._index - 1 3845 negate = self._match(TokenType.NOT) 3846 3847 if self._match_text_seq("DISTINCT", "FROM"): 3848 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3849 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3850 3851 expression = self._parse_null() or self._parse_boolean() 3852 if not expression: 3853 self._retreat(index) 3854 return None 3855 3856 this = self.expression(exp.Is, this=this, expression=expression) 3857 return self.expression(exp.Not, this=this) if negate else this 3858 3859 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3860 unnest = self._parse_unnest(with_alias=False) 3861 if unnest: 3862 this = self.expression(exp.In, this=this, unnest=unnest) 3863 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3864 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3865 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3866 3867 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3868 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3869 else: 3870 this = self.expression(exp.In, this=this, expressions=expressions) 3871 3872 if matched_l_paren: 3873 self._match_r_paren(this) 3874 elif not self._match(TokenType.R_BRACKET, expression=this): 3875 self.raise_error("Expecting ]") 3876 else: 3877 this = self.expression(exp.In, this=this, field=self._parse_field()) 3878 3879 return this 3880 3881 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3882 low = self._parse_bitwise() 3883 self._match(TokenType.AND) 3884 high = self._parse_bitwise() 3885 return self.expression(exp.Between, this=this, low=low, high=high) 3886 3887 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3888 if not self._match(TokenType.ESCAPE): 3889 return this 3890 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3891 3892 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3893 index = self._index 3894 3895 if not self._match(TokenType.INTERVAL) and match_interval: 3896 return None 3897 3898 if self._match(TokenType.STRING, advance=False): 3899 this = self._parse_primary() 3900 else: 3901 this = self._parse_term() 3902 3903 if not this or ( 3904 isinstance(this, exp.Column) 3905 and not this.table 3906 and not this.this.quoted 3907 and this.name.upper() == "IS" 3908 ): 3909 self._retreat(index) 3910 return None 3911 3912 unit = self._parse_function() or ( 3913 not self._match(TokenType.ALIAS, advance=False) 3914 and self._parse_var(any_token=True, upper=True) 3915 ) 3916 3917 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3918 # each INTERVAL expression into this canonical form so it's easy to transpile 3919 if this and this.is_number: 3920 this = exp.Literal.string(this.name) 3921 elif this and this.is_string: 3922 parts = this.name.split() 3923 3924 if len(parts) == 2: 3925 if unit: 3926 # This is not actually a unit, it's something else (e.g. a "window side") 3927 unit = None 3928 self._retreat(self._index - 1) 3929 3930 this = exp.Literal.string(parts[0]) 3931 unit = self.expression(exp.Var, this=parts[1].upper()) 3932 3933 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3934 unit = self.expression( 3935 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3936 ) 3937 3938 return self.expression(exp.Interval, this=this, unit=unit) 3939 3940 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3941 this = self._parse_term() 3942 3943 while True: 3944 if self._match_set(self.BITWISE): 3945 this = self.expression( 3946 self.BITWISE[self._prev.token_type], 3947 this=this, 3948 expression=self._parse_term(), 3949 ) 3950 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3951 this = self.expression( 3952 exp.DPipe, 3953 this=this, 3954 expression=self._parse_term(), 3955 safe=not self.dialect.STRICT_STRING_CONCAT, 3956 ) 3957 elif self._match(TokenType.DQMARK): 3958 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3959 elif self._match_pair(TokenType.LT, TokenType.LT): 3960 this = self.expression( 3961 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3962 ) 3963 elif self._match_pair(TokenType.GT, TokenType.GT): 3964 this = self.expression( 3965 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3966 ) 3967 else: 3968 break 3969 3970 return this 3971 3972 def _parse_term(self) -> t.Optional[exp.Expression]: 3973 return self._parse_tokens(self._parse_factor, self.TERM) 3974 3975 def _parse_factor(self) -> t.Optional[exp.Expression]: 3976 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3977 this = parse_method() 3978 3979 while self._match_set(self.FACTOR): 3980 this = self.expression( 3981 self.FACTOR[self._prev.token_type], 3982 this=this, 3983 comments=self._prev_comments, 3984 expression=parse_method(), 3985 ) 3986 if isinstance(this, exp.Div): 3987 this.args["typed"] = self.dialect.TYPED_DIVISION 3988 this.args["safe"] = self.dialect.SAFE_DIVISION 3989 3990 return this 3991 3992 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3993 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3994 3995 def _parse_unary(self) -> t.Optional[exp.Expression]: 3996 if self._match_set(self.UNARY_PARSERS): 3997 return self.UNARY_PARSERS[self._prev.token_type](self) 3998 return self._parse_at_time_zone(self._parse_type()) 3999 4000 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4001 interval = parse_interval and self._parse_interval() 4002 if interval: 4003 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4004 while True: 4005 index = self._index 4006 self._match(TokenType.PLUS) 4007 4008 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4009 self._retreat(index) 4010 break 4011 4012 interval = self.expression( # type: ignore 4013 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4014 ) 4015 4016 return interval 4017 4018 index = self._index 4019 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4020 this = self._parse_column() 4021 4022 if data_type: 4023 if isinstance(this, exp.Literal): 4024 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4025 if parser: 4026 return parser(self, this, data_type) 4027 return self.expression(exp.Cast, this=this, to=data_type) 4028 if not data_type.expressions: 4029 self._retreat(index) 4030 return self._parse_column() 4031 return self._parse_column_ops(data_type) 4032 4033 return this and self._parse_column_ops(this) 4034 4035 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4036 this = self._parse_type() 4037 if not this: 4038 return None 4039 4040 if isinstance(this, exp.Column) and not this.table: 4041 this = exp.var(this.name.upper()) 4042 4043 return self.expression( 4044 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4045 ) 4046 4047 def _parse_types( 4048 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4049 ) -> t.Optional[exp.Expression]: 4050 index = self._index 4051 4052 prefix = self._match_text_seq("SYSUDTLIB", ".") 4053 4054 if not self._match_set(self.TYPE_TOKENS): 4055 identifier = allow_identifiers and self._parse_id_var( 4056 any_token=False, tokens=(TokenType.VAR,) 4057 ) 4058 if identifier: 4059 tokens = self.dialect.tokenize(identifier.name) 4060 4061 if len(tokens) != 1: 4062 self.raise_error("Unexpected identifier", self._prev) 4063 4064 if tokens[0].token_type in self.TYPE_TOKENS: 4065 self._prev = tokens[0] 4066 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4067 type_name = identifier.name 4068 4069 while self._match(TokenType.DOT): 4070 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4071 4072 return exp.DataType.build(type_name, udt=True) 4073 else: 4074 self._retreat(self._index - 1) 4075 return None 4076 else: 4077 return None 4078 4079 type_token = self._prev.token_type 4080 4081 if type_token == TokenType.PSEUDO_TYPE: 4082 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4083 4084 if type_token == TokenType.OBJECT_IDENTIFIER: 4085 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4086 4087 nested = type_token in self.NESTED_TYPE_TOKENS 4088 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4089 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4090 expressions = None 4091 maybe_func = False 4092 4093 if self._match(TokenType.L_PAREN): 4094 if is_struct: 4095 expressions = self._parse_csv(self._parse_struct_types) 4096 elif nested: 4097 expressions = self._parse_csv( 4098 lambda: self._parse_types( 4099 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4100 ) 4101 ) 4102 elif type_token in self.ENUM_TYPE_TOKENS: 4103 expressions = self._parse_csv(self._parse_equality) 4104 elif is_aggregate: 4105 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4106 any_token=False, tokens=(TokenType.VAR,) 4107 ) 4108 if not func_or_ident or not self._match(TokenType.COMMA): 4109 return None 4110 expressions = self._parse_csv( 4111 lambda: self._parse_types( 4112 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4113 ) 4114 ) 4115 expressions.insert(0, func_or_ident) 4116 else: 4117 expressions = self._parse_csv(self._parse_type_size) 4118 4119 if not expressions or not self._match(TokenType.R_PAREN): 4120 self._retreat(index) 4121 return None 4122 4123 maybe_func = True 4124 4125 this: t.Optional[exp.Expression] = None 4126 values: t.Optional[t.List[exp.Expression]] = None 4127 4128 if nested and self._match(TokenType.LT): 4129 if is_struct: 4130 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4131 else: 4132 expressions = self._parse_csv( 4133 lambda: self._parse_types( 4134 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4135 ) 4136 ) 4137 4138 if not self._match(TokenType.GT): 4139 self.raise_error("Expecting >") 4140 4141 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4142 values = self._parse_csv(self._parse_conjunction) 4143 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4144 4145 if type_token in self.TIMESTAMPS: 4146 if self._match_text_seq("WITH", "TIME", "ZONE"): 4147 maybe_func = False 4148 tz_type = ( 4149 exp.DataType.Type.TIMETZ 4150 if type_token in self.TIMES 4151 else exp.DataType.Type.TIMESTAMPTZ 4152 ) 4153 this = exp.DataType(this=tz_type, expressions=expressions) 4154 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4155 maybe_func = False 4156 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4157 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4158 maybe_func = False 4159 elif type_token == TokenType.INTERVAL: 4160 unit = self._parse_var(upper=True) 4161 if unit: 4162 if self._match_text_seq("TO"): 4163 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4164 4165 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4166 else: 4167 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4168 4169 if maybe_func and check_func: 4170 index2 = self._index 4171 peek = self._parse_string() 4172 4173 if not peek: 4174 self._retreat(index) 4175 return None 4176 4177 self._retreat(index2) 4178 4179 if not this: 4180 if self._match_text_seq("UNSIGNED"): 4181 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4182 if not unsigned_type_token: 4183 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4184 4185 type_token = unsigned_type_token or type_token 4186 4187 this = exp.DataType( 4188 this=exp.DataType.Type[type_token.value], 4189 expressions=expressions, 4190 nested=nested, 4191 values=values, 4192 prefix=prefix, 4193 ) 4194 4195 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4196 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4197 4198 return this 4199 4200 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4201 index = self._index 4202 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4203 self._match(TokenType.COLON) 4204 column_def = self._parse_column_def(this) 4205 4206 if type_required and ( 4207 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4208 ): 4209 self._retreat(index) 4210 return self._parse_types() 4211 4212 return column_def 4213 4214 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4215 if not self._match_text_seq("AT", "TIME", "ZONE"): 4216 return this 4217 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4218 4219 def _parse_column(self) -> t.Optional[exp.Expression]: 4220 this = self._parse_column_reference() 4221 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4222 4223 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4224 this = self._parse_field() 4225 if ( 4226 not this 4227 and self._match(TokenType.VALUES, advance=False) 4228 and self.VALUES_FOLLOWED_BY_PAREN 4229 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4230 ): 4231 this = self._parse_id_var() 4232 4233 if isinstance(this, exp.Identifier): 4234 # We bubble up comments from the Identifier to the Column 4235 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4236 4237 return this 4238 4239 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4240 this = self._parse_bracket(this) 4241 4242 while self._match_set(self.COLUMN_OPERATORS): 4243 op_token = self._prev.token_type 4244 op = self.COLUMN_OPERATORS.get(op_token) 4245 4246 if op_token == TokenType.DCOLON: 4247 field = self._parse_types() 4248 if not field: 4249 self.raise_error("Expected type") 4250 elif op and self._curr: 4251 field = self._parse_column_reference() 4252 else: 4253 field = self._parse_field(any_token=True, anonymous_func=True) 4254 4255 if isinstance(field, exp.Func) and this: 4256 # bigquery allows function calls like x.y.count(...) 4257 # SAFE.SUBSTR(...) 4258 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4259 this = exp.replace_tree( 4260 this, 4261 lambda n: ( 4262 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4263 if n.table 4264 else n.this 4265 ) 4266 if isinstance(n, exp.Column) 4267 else n, 4268 ) 4269 4270 if op: 4271 this = op(self, this, field) 4272 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4273 this = self.expression( 4274 exp.Column, 4275 this=field, 4276 table=this.this, 4277 db=this.args.get("table"), 4278 catalog=this.args.get("db"), 4279 ) 4280 else: 4281 this = self.expression(exp.Dot, this=this, expression=field) 4282 this = self._parse_bracket(this) 4283 return this 4284 4285 def _parse_primary(self) -> t.Optional[exp.Expression]: 4286 if self._match_set(self.PRIMARY_PARSERS): 4287 token_type = self._prev.token_type 4288 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4289 4290 if token_type == TokenType.STRING: 4291 expressions = [primary] 4292 while self._match(TokenType.STRING): 4293 expressions.append(exp.Literal.string(self._prev.text)) 4294 4295 if len(expressions) > 1: 4296 return self.expression(exp.Concat, expressions=expressions) 4297 4298 return primary 4299 4300 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4301 return exp.Literal.number(f"0.{self._prev.text}") 4302 4303 if self._match(TokenType.L_PAREN): 4304 comments = self._prev_comments 4305 query = self._parse_select() 4306 4307 if query: 4308 expressions = [query] 4309 else: 4310 expressions = self._parse_expressions() 4311 4312 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4313 4314 if isinstance(this, exp.UNWRAPPED_QUERIES): 4315 this = self._parse_set_operations( 4316 self._parse_subquery(this=this, parse_alias=False) 4317 ) 4318 elif isinstance(this, exp.Subquery): 4319 this = self._parse_subquery( 4320 this=self._parse_set_operations(this), parse_alias=False 4321 ) 4322 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4323 this = self.expression(exp.Tuple, expressions=expressions) 4324 else: 4325 this = self.expression(exp.Paren, this=this) 4326 4327 if this: 4328 this.add_comments(comments) 4329 4330 self._match_r_paren(expression=this) 4331 return this 4332 4333 return None 4334 4335 def _parse_field( 4336 self, 4337 any_token: bool = False, 4338 tokens: t.Optional[t.Collection[TokenType]] = None, 4339 anonymous_func: bool = False, 4340 ) -> t.Optional[exp.Expression]: 4341 if anonymous_func: 4342 field = ( 4343 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4344 or self._parse_primary() 4345 ) 4346 else: 4347 field = self._parse_primary() or self._parse_function( 4348 anonymous=anonymous_func, any_token=any_token 4349 ) 4350 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4351 4352 def _parse_function( 4353 self, 4354 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4355 anonymous: bool = False, 4356 optional_parens: bool = True, 4357 any_token: bool = False, 4358 ) -> t.Optional[exp.Expression]: 4359 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4360 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4361 fn_syntax = False 4362 if ( 4363 self._match(TokenType.L_BRACE, advance=False) 4364 and self._next 4365 and self._next.text.upper() == "FN" 4366 ): 4367 self._advance(2) 4368 fn_syntax = True 4369 4370 func = self._parse_function_call( 4371 functions=functions, 4372 anonymous=anonymous, 4373 optional_parens=optional_parens, 4374 any_token=any_token, 4375 ) 4376 4377 if fn_syntax: 4378 self._match(TokenType.R_BRACE) 4379 4380 return func 4381 4382 def _parse_function_call( 4383 self, 4384 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4385 anonymous: bool = False, 4386 optional_parens: bool = True, 4387 any_token: bool = False, 4388 ) -> t.Optional[exp.Expression]: 4389 if not self._curr: 4390 return None 4391 4392 comments = self._curr.comments 4393 token_type = self._curr.token_type 4394 this = self._curr.text 4395 upper = this.upper() 4396 4397 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4398 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4399 self._advance() 4400 return self._parse_window(parser(self)) 4401 4402 if not self._next or self._next.token_type != TokenType.L_PAREN: 4403 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4404 self._advance() 4405 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4406 4407 return None 4408 4409 if any_token: 4410 if token_type in self.RESERVED_TOKENS: 4411 return None 4412 elif token_type not in self.FUNC_TOKENS: 4413 return None 4414 4415 self._advance(2) 4416 4417 parser = self.FUNCTION_PARSERS.get(upper) 4418 if parser and not anonymous: 4419 this = parser(self) 4420 else: 4421 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4422 4423 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4424 this = self.expression(subquery_predicate, this=self._parse_select()) 4425 self._match_r_paren() 4426 return this 4427 4428 if functions is None: 4429 functions = self.FUNCTIONS 4430 4431 function = functions.get(upper) 4432 4433 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4434 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4435 4436 if alias: 4437 args = self._kv_to_prop_eq(args) 4438 4439 if function and not anonymous: 4440 if "dialect" in function.__code__.co_varnames: 4441 func = function(args, dialect=self.dialect) 4442 else: 4443 func = function(args) 4444 4445 func = self.validate_expression(func, args) 4446 if not self.dialect.NORMALIZE_FUNCTIONS: 4447 func.meta["name"] = this 4448 4449 this = func 4450 else: 4451 if token_type == TokenType.IDENTIFIER: 4452 this = exp.Identifier(this=this, quoted=True) 4453 this = self.expression(exp.Anonymous, this=this, expressions=args) 4454 4455 if isinstance(this, exp.Expression): 4456 this.add_comments(comments) 4457 4458 self._match_r_paren(this) 4459 return self._parse_window(this) 4460 4461 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4462 transformed = [] 4463 4464 for e in expressions: 4465 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4466 if isinstance(e, exp.Alias): 4467 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4468 4469 if not isinstance(e, exp.PropertyEQ): 4470 e = self.expression( 4471 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4472 ) 4473 4474 if isinstance(e.this, exp.Column): 4475 e.this.replace(e.this.this) 4476 4477 transformed.append(e) 4478 4479 return transformed 4480 4481 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4482 return self._parse_column_def(self._parse_id_var()) 4483 4484 def _parse_user_defined_function( 4485 self, kind: t.Optional[TokenType] = None 4486 ) -> t.Optional[exp.Expression]: 4487 this = self._parse_id_var() 4488 4489 while self._match(TokenType.DOT): 4490 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4491 4492 if not self._match(TokenType.L_PAREN): 4493 return this 4494 4495 expressions = self._parse_csv(self._parse_function_parameter) 4496 self._match_r_paren() 4497 return self.expression( 4498 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4499 ) 4500 4501 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4502 literal = self._parse_primary() 4503 if literal: 4504 return self.expression(exp.Introducer, this=token.text, expression=literal) 4505 4506 return self.expression(exp.Identifier, this=token.text) 4507 4508 def _parse_session_parameter(self) -> exp.SessionParameter: 4509 kind = None 4510 this = self._parse_id_var() or self._parse_primary() 4511 4512 if this and self._match(TokenType.DOT): 4513 kind = this.name 4514 this = self._parse_var() or self._parse_primary() 4515 4516 return self.expression(exp.SessionParameter, this=this, kind=kind) 4517 4518 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4519 index = self._index 4520 4521 if self._match(TokenType.L_PAREN): 4522 expressions = t.cast( 4523 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4524 ) 4525 4526 if not self._match(TokenType.R_PAREN): 4527 self._retreat(index) 4528 else: 4529 expressions = [self._parse_id_var()] 4530 4531 if self._match_set(self.LAMBDAS): 4532 return self.LAMBDAS[self._prev.token_type](self, expressions) 4533 4534 self._retreat(index) 4535 4536 this: t.Optional[exp.Expression] 4537 4538 if self._match(TokenType.DISTINCT): 4539 this = self.expression( 4540 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4541 ) 4542 else: 4543 this = self._parse_select_or_expression(alias=alias) 4544 4545 return self._parse_limit( 4546 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4547 ) 4548 4549 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4550 index = self._index 4551 if not self._match(TokenType.L_PAREN): 4552 return this 4553 4554 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4555 # expr can be of both types 4556 if self._match_set(self.SELECT_START_TOKENS): 4557 self._retreat(index) 4558 return this 4559 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4560 self._match_r_paren() 4561 return self.expression(exp.Schema, this=this, expressions=args) 4562 4563 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4564 return self._parse_column_def(self._parse_field(any_token=True)) 4565 4566 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4567 # column defs are not really columns, they're identifiers 4568 if isinstance(this, exp.Column): 4569 this = this.this 4570 4571 kind = self._parse_types(schema=True) 4572 4573 if self._match_text_seq("FOR", "ORDINALITY"): 4574 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4575 4576 constraints: t.List[exp.Expression] = [] 4577 4578 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4579 ("ALIAS", "MATERIALIZED") 4580 ): 4581 persisted = self._prev.text.upper() == "MATERIALIZED" 4582 constraints.append( 4583 self.expression( 4584 exp.ComputedColumnConstraint, 4585 this=self._parse_conjunction(), 4586 persisted=persisted or self._match_text_seq("PERSISTED"), 4587 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4588 ) 4589 ) 4590 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4591 self._match(TokenType.ALIAS) 4592 constraints.append( 4593 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4594 ) 4595 4596 while True: 4597 constraint = self._parse_column_constraint() 4598 if not constraint: 4599 break 4600 constraints.append(constraint) 4601 4602 if not kind and not constraints: 4603 return this 4604 4605 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4606 4607 def _parse_auto_increment( 4608 self, 4609 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4610 start = None 4611 increment = None 4612 4613 if self._match(TokenType.L_PAREN, advance=False): 4614 args = self._parse_wrapped_csv(self._parse_bitwise) 4615 start = seq_get(args, 0) 4616 increment = seq_get(args, 1) 4617 elif self._match_text_seq("START"): 4618 start = self._parse_bitwise() 4619 self._match_text_seq("INCREMENT") 4620 increment = self._parse_bitwise() 4621 4622 if start and increment: 4623 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4624 4625 return exp.AutoIncrementColumnConstraint() 4626 4627 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4628 if not self._match_text_seq("REFRESH"): 4629 self._retreat(self._index - 1) 4630 return None 4631 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4632 4633 def _parse_compress(self) -> exp.CompressColumnConstraint: 4634 if self._match(TokenType.L_PAREN, advance=False): 4635 return self.expression( 4636 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4637 ) 4638 4639 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4640 4641 def _parse_generated_as_identity( 4642 self, 4643 ) -> ( 4644 exp.GeneratedAsIdentityColumnConstraint 4645 | exp.ComputedColumnConstraint 4646 | exp.GeneratedAsRowColumnConstraint 4647 ): 4648 if self._match_text_seq("BY", "DEFAULT"): 4649 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4650 this = self.expression( 4651 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4652 ) 4653 else: 4654 self._match_text_seq("ALWAYS") 4655 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4656 4657 self._match(TokenType.ALIAS) 4658 4659 if self._match_text_seq("ROW"): 4660 start = self._match_text_seq("START") 4661 if not start: 4662 self._match(TokenType.END) 4663 hidden = self._match_text_seq("HIDDEN") 4664 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4665 4666 identity = self._match_text_seq("IDENTITY") 4667 4668 if self._match(TokenType.L_PAREN): 4669 if self._match(TokenType.START_WITH): 4670 this.set("start", self._parse_bitwise()) 4671 if self._match_text_seq("INCREMENT", "BY"): 4672 this.set("increment", self._parse_bitwise()) 4673 if self._match_text_seq("MINVALUE"): 4674 this.set("minvalue", self._parse_bitwise()) 4675 if self._match_text_seq("MAXVALUE"): 4676 this.set("maxvalue", self._parse_bitwise()) 4677 4678 if self._match_text_seq("CYCLE"): 4679 this.set("cycle", True) 4680 elif self._match_text_seq("NO", "CYCLE"): 4681 this.set("cycle", False) 4682 4683 if not identity: 4684 this.set("expression", self._parse_bitwise()) 4685 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4686 args = self._parse_csv(self._parse_bitwise) 4687 this.set("start", seq_get(args, 0)) 4688 this.set("increment", seq_get(args, 1)) 4689 4690 self._match_r_paren() 4691 4692 return this 4693 4694 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4695 self._match_text_seq("LENGTH") 4696 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4697 4698 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4699 if self._match_text_seq("NULL"): 4700 return self.expression(exp.NotNullColumnConstraint) 4701 if self._match_text_seq("CASESPECIFIC"): 4702 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4703 if self._match_text_seq("FOR", "REPLICATION"): 4704 return self.expression(exp.NotForReplicationColumnConstraint) 4705 return None 4706 4707 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4708 if self._match(TokenType.CONSTRAINT): 4709 this = self._parse_id_var() 4710 else: 4711 this = None 4712 4713 if self._match_texts(self.CONSTRAINT_PARSERS): 4714 return self.expression( 4715 exp.ColumnConstraint, 4716 this=this, 4717 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4718 ) 4719 4720 return this 4721 4722 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4723 if not self._match(TokenType.CONSTRAINT): 4724 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4725 4726 return self.expression( 4727 exp.Constraint, 4728 this=self._parse_id_var(), 4729 expressions=self._parse_unnamed_constraints(), 4730 ) 4731 4732 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4733 constraints = [] 4734 while True: 4735 constraint = self._parse_unnamed_constraint() or self._parse_function() 4736 if not constraint: 4737 break 4738 constraints.append(constraint) 4739 4740 return constraints 4741 4742 def _parse_unnamed_constraint( 4743 self, constraints: t.Optional[t.Collection[str]] = None 4744 ) -> t.Optional[exp.Expression]: 4745 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4746 constraints or self.CONSTRAINT_PARSERS 4747 ): 4748 return None 4749 4750 constraint = self._prev.text.upper() 4751 if constraint not in self.CONSTRAINT_PARSERS: 4752 self.raise_error(f"No parser found for schema constraint {constraint}.") 4753 4754 return self.CONSTRAINT_PARSERS[constraint](self) 4755 4756 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4757 self._match_text_seq("KEY") 4758 return self.expression( 4759 exp.UniqueColumnConstraint, 4760 this=self._parse_schema(self._parse_id_var(any_token=False)), 4761 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4762 on_conflict=self._parse_on_conflict(), 4763 ) 4764 4765 def _parse_key_constraint_options(self) -> t.List[str]: 4766 options = [] 4767 while True: 4768 if not self._curr: 4769 break 4770 4771 if self._match(TokenType.ON): 4772 action = None 4773 on = self._advance_any() and self._prev.text 4774 4775 if self._match_text_seq("NO", "ACTION"): 4776 action = "NO ACTION" 4777 elif self._match_text_seq("CASCADE"): 4778 action = "CASCADE" 4779 elif self._match_text_seq("RESTRICT"): 4780 action = "RESTRICT" 4781 elif self._match_pair(TokenType.SET, TokenType.NULL): 4782 action = "SET NULL" 4783 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4784 action = "SET DEFAULT" 4785 else: 4786 self.raise_error("Invalid key constraint") 4787 4788 options.append(f"ON {on} {action}") 4789 elif self._match_text_seq("NOT", "ENFORCED"): 4790 options.append("NOT ENFORCED") 4791 elif self._match_text_seq("DEFERRABLE"): 4792 options.append("DEFERRABLE") 4793 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4794 options.append("INITIALLY DEFERRED") 4795 elif self._match_text_seq("NORELY"): 4796 options.append("NORELY") 4797 elif self._match_text_seq("MATCH", "FULL"): 4798 options.append("MATCH FULL") 4799 else: 4800 break 4801 4802 return options 4803 4804 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4805 if match and not self._match(TokenType.REFERENCES): 4806 return None 4807 4808 expressions = None 4809 this = self._parse_table(schema=True) 4810 options = self._parse_key_constraint_options() 4811 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4812 4813 def _parse_foreign_key(self) -> exp.ForeignKey: 4814 expressions = self._parse_wrapped_id_vars() 4815 reference = self._parse_references() 4816 options = {} 4817 4818 while self._match(TokenType.ON): 4819 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4820 self.raise_error("Expected DELETE or UPDATE") 4821 4822 kind = self._prev.text.lower() 4823 4824 if self._match_text_seq("NO", "ACTION"): 4825 action = "NO ACTION" 4826 elif self._match(TokenType.SET): 4827 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4828 action = "SET " + self._prev.text.upper() 4829 else: 4830 self._advance() 4831 action = self._prev.text.upper() 4832 4833 options[kind] = action 4834 4835 return self.expression( 4836 exp.ForeignKey, 4837 expressions=expressions, 4838 reference=reference, 4839 **options, # type: ignore 4840 ) 4841 4842 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4843 return self._parse_field() 4844 4845 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4846 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4847 self._retreat(self._index - 1) 4848 return None 4849 4850 id_vars = self._parse_wrapped_id_vars() 4851 return self.expression( 4852 exp.PeriodForSystemTimeConstraint, 4853 this=seq_get(id_vars, 0), 4854 expression=seq_get(id_vars, 1), 4855 ) 4856 4857 def _parse_primary_key( 4858 self, wrapped_optional: bool = False, in_props: bool = False 4859 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4860 desc = ( 4861 self._match_set((TokenType.ASC, TokenType.DESC)) 4862 and self._prev.token_type == TokenType.DESC 4863 ) 4864 4865 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4866 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4867 4868 expressions = self._parse_wrapped_csv( 4869 self._parse_primary_key_part, optional=wrapped_optional 4870 ) 4871 options = self._parse_key_constraint_options() 4872 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4873 4874 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4875 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4876 4877 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4878 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4879 return this 4880 4881 bracket_kind = self._prev.token_type 4882 expressions = self._parse_csv( 4883 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4884 ) 4885 4886 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4887 self.raise_error("Expected ]") 4888 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4889 self.raise_error("Expected }") 4890 4891 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4892 if bracket_kind == TokenType.L_BRACE: 4893 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4894 elif not this or this.name.upper() == "ARRAY": 4895 this = self.expression(exp.Array, expressions=expressions) 4896 else: 4897 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4898 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4899 4900 self._add_comments(this) 4901 return self._parse_bracket(this) 4902 4903 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4904 if self._match(TokenType.COLON): 4905 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4906 return this 4907 4908 def _parse_case(self) -> t.Optional[exp.Expression]: 4909 ifs = [] 4910 default = None 4911 4912 comments = self._prev_comments 4913 expression = self._parse_conjunction() 4914 4915 while self._match(TokenType.WHEN): 4916 this = self._parse_conjunction() 4917 self._match(TokenType.THEN) 4918 then = self._parse_conjunction() 4919 ifs.append(self.expression(exp.If, this=this, true=then)) 4920 4921 if self._match(TokenType.ELSE): 4922 default = self._parse_conjunction() 4923 4924 if not self._match(TokenType.END): 4925 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4926 default = exp.column("interval") 4927 else: 4928 self.raise_error("Expected END after CASE", self._prev) 4929 4930 return self.expression( 4931 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4932 ) 4933 4934 def _parse_if(self) -> t.Optional[exp.Expression]: 4935 if self._match(TokenType.L_PAREN): 4936 args = self._parse_csv(self._parse_conjunction) 4937 this = self.validate_expression(exp.If.from_arg_list(args), args) 4938 self._match_r_paren() 4939 else: 4940 index = self._index - 1 4941 4942 if self.NO_PAREN_IF_COMMANDS and index == 0: 4943 return self._parse_as_command(self._prev) 4944 4945 condition = self._parse_conjunction() 4946 4947 if not condition: 4948 self._retreat(index) 4949 return None 4950 4951 self._match(TokenType.THEN) 4952 true = self._parse_conjunction() 4953 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4954 self._match(TokenType.END) 4955 this = self.expression(exp.If, this=condition, true=true, false=false) 4956 4957 return this 4958 4959 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4960 if not self._match_text_seq("VALUE", "FOR"): 4961 self._retreat(self._index - 1) 4962 return None 4963 4964 return self.expression( 4965 exp.NextValueFor, 4966 this=self._parse_column(), 4967 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4968 ) 4969 4970 def _parse_extract(self) -> exp.Extract: 4971 this = self._parse_function() or self._parse_var() or self._parse_type() 4972 4973 if self._match(TokenType.FROM): 4974 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4975 4976 if not self._match(TokenType.COMMA): 4977 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4978 4979 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4980 4981 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4982 this = self._parse_conjunction() 4983 4984 if not self._match(TokenType.ALIAS): 4985 if self._match(TokenType.COMMA): 4986 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4987 4988 self.raise_error("Expected AS after CAST") 4989 4990 fmt = None 4991 to = self._parse_types() 4992 4993 if self._match(TokenType.FORMAT): 4994 fmt_string = self._parse_string() 4995 fmt = self._parse_at_time_zone(fmt_string) 4996 4997 if not to: 4998 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4999 if to.this in exp.DataType.TEMPORAL_TYPES: 5000 this = self.expression( 5001 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5002 this=this, 5003 format=exp.Literal.string( 5004 format_time( 5005 fmt_string.this if fmt_string else "", 5006 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5007 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5008 ) 5009 ), 5010 ) 5011 5012 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5013 this.set("zone", fmt.args["zone"]) 5014 return this 5015 elif not to: 5016 self.raise_error("Expected TYPE after CAST") 5017 elif isinstance(to, exp.Identifier): 5018 to = exp.DataType.build(to.name, udt=True) 5019 elif to.this == exp.DataType.Type.CHAR: 5020 if self._match(TokenType.CHARACTER_SET): 5021 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5022 5023 return self.expression( 5024 exp.Cast if strict else exp.TryCast, 5025 this=this, 5026 to=to, 5027 format=fmt, 5028 safe=safe, 5029 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5030 ) 5031 5032 def _parse_string_agg(self) -> exp.Expression: 5033 if self._match(TokenType.DISTINCT): 5034 args: t.List[t.Optional[exp.Expression]] = [ 5035 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5036 ] 5037 if self._match(TokenType.COMMA): 5038 args.extend(self._parse_csv(self._parse_conjunction)) 5039 else: 5040 args = self._parse_csv(self._parse_conjunction) # type: ignore 5041 5042 index = self._index 5043 if not self._match(TokenType.R_PAREN) and args: 5044 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5045 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5046 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5047 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5048 5049 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5050 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5051 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5052 if not self._match_text_seq("WITHIN", "GROUP"): 5053 self._retreat(index) 5054 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5055 5056 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5057 order = self._parse_order(this=seq_get(args, 0)) 5058 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5059 5060 def _parse_convert( 5061 self, strict: bool, safe: t.Optional[bool] = None 5062 ) -> t.Optional[exp.Expression]: 5063 this = self._parse_bitwise() 5064 5065 if self._match(TokenType.USING): 5066 to: t.Optional[exp.Expression] = self.expression( 5067 exp.CharacterSet, this=self._parse_var() 5068 ) 5069 elif self._match(TokenType.COMMA): 5070 to = self._parse_types() 5071 else: 5072 to = None 5073 5074 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5075 5076 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5077 """ 5078 There are generally two variants of the DECODE function: 5079 5080 - DECODE(bin, charset) 5081 - DECODE(expression, search, result [, search, result] ... [, default]) 5082 5083 The second variant will always be parsed into a CASE expression. Note that NULL 5084 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5085 instead of relying on pattern matching. 5086 """ 5087 args = self._parse_csv(self._parse_conjunction) 5088 5089 if len(args) < 3: 5090 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5091 5092 expression, *expressions = args 5093 if not expression: 5094 return None 5095 5096 ifs = [] 5097 for search, result in zip(expressions[::2], expressions[1::2]): 5098 if not search or not result: 5099 return None 5100 5101 if isinstance(search, exp.Literal): 5102 ifs.append( 5103 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5104 ) 5105 elif isinstance(search, exp.Null): 5106 ifs.append( 5107 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5108 ) 5109 else: 5110 cond = exp.or_( 5111 exp.EQ(this=expression.copy(), expression=search), 5112 exp.and_( 5113 exp.Is(this=expression.copy(), expression=exp.Null()), 5114 exp.Is(this=search.copy(), expression=exp.Null()), 5115 copy=False, 5116 ), 5117 copy=False, 5118 ) 5119 ifs.append(exp.If(this=cond, true=result)) 5120 5121 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5122 5123 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5124 self._match_text_seq("KEY") 5125 key = self._parse_column() 5126 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5127 self._match_text_seq("VALUE") 5128 value = self._parse_bitwise() 5129 5130 if not key and not value: 5131 return None 5132 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5133 5134 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5135 if not this or not self._match_text_seq("FORMAT", "JSON"): 5136 return this 5137 5138 return self.expression(exp.FormatJson, this=this) 5139 5140 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5141 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5142 for value in values: 5143 if self._match_text_seq(value, "ON", on): 5144 return f"{value} ON {on}" 5145 5146 return None 5147 5148 @t.overload 5149 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5150 5151 @t.overload 5152 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5153 5154 def _parse_json_object(self, agg=False): 5155 star = self._parse_star() 5156 expressions = ( 5157 [star] 5158 if star 5159 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5160 ) 5161 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5162 5163 unique_keys = None 5164 if self._match_text_seq("WITH", "UNIQUE"): 5165 unique_keys = True 5166 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5167 unique_keys = False 5168 5169 self._match_text_seq("KEYS") 5170 5171 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5172 self._parse_type() 5173 ) 5174 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5175 5176 return self.expression( 5177 exp.JSONObjectAgg if agg else exp.JSONObject, 5178 expressions=expressions, 5179 null_handling=null_handling, 5180 unique_keys=unique_keys, 5181 return_type=return_type, 5182 encoding=encoding, 5183 ) 5184 5185 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5186 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5187 if not self._match_text_seq("NESTED"): 5188 this = self._parse_id_var() 5189 kind = self._parse_types(allow_identifiers=False) 5190 nested = None 5191 else: 5192 this = None 5193 kind = None 5194 nested = True 5195 5196 path = self._match_text_seq("PATH") and self._parse_string() 5197 nested_schema = nested and self._parse_json_schema() 5198 5199 return self.expression( 5200 exp.JSONColumnDef, 5201 this=this, 5202 kind=kind, 5203 path=path, 5204 nested_schema=nested_schema, 5205 ) 5206 5207 def _parse_json_schema(self) -> exp.JSONSchema: 5208 self._match_text_seq("COLUMNS") 5209 return self.expression( 5210 exp.JSONSchema, 5211 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5212 ) 5213 5214 def _parse_json_table(self) -> exp.JSONTable: 5215 this = self._parse_format_json(self._parse_bitwise()) 5216 path = self._match(TokenType.COMMA) and self._parse_string() 5217 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5218 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5219 schema = self._parse_json_schema() 5220 5221 return exp.JSONTable( 5222 this=this, 5223 schema=schema, 5224 path=path, 5225 error_handling=error_handling, 5226 empty_handling=empty_handling, 5227 ) 5228 5229 def _parse_match_against(self) -> exp.MatchAgainst: 5230 expressions = self._parse_csv(self._parse_column) 5231 5232 self._match_text_seq(")", "AGAINST", "(") 5233 5234 this = self._parse_string() 5235 5236 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5237 modifier = "IN NATURAL LANGUAGE MODE" 5238 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5239 modifier = f"{modifier} WITH QUERY EXPANSION" 5240 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5241 modifier = "IN BOOLEAN MODE" 5242 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5243 modifier = "WITH QUERY EXPANSION" 5244 else: 5245 modifier = None 5246 5247 return self.expression( 5248 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5249 ) 5250 5251 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5252 def _parse_open_json(self) -> exp.OpenJSON: 5253 this = self._parse_bitwise() 5254 path = self._match(TokenType.COMMA) and self._parse_string() 5255 5256 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5257 this = self._parse_field(any_token=True) 5258 kind = self._parse_types() 5259 path = self._parse_string() 5260 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5261 5262 return self.expression( 5263 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5264 ) 5265 5266 expressions = None 5267 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5268 self._match_l_paren() 5269 expressions = self._parse_csv(_parse_open_json_column_def) 5270 5271 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5272 5273 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5274 args = self._parse_csv(self._parse_bitwise) 5275 5276 if self._match(TokenType.IN): 5277 return self.expression( 5278 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5279 ) 5280 5281 if haystack_first: 5282 haystack = seq_get(args, 0) 5283 needle = seq_get(args, 1) 5284 else: 5285 needle = seq_get(args, 0) 5286 haystack = seq_get(args, 1) 5287 5288 return self.expression( 5289 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5290 ) 5291 5292 def _parse_predict(self) -> exp.Predict: 5293 self._match_text_seq("MODEL") 5294 this = self._parse_table() 5295 5296 self._match(TokenType.COMMA) 5297 self._match_text_seq("TABLE") 5298 5299 return self.expression( 5300 exp.Predict, 5301 this=this, 5302 expression=self._parse_table(), 5303 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5304 ) 5305 5306 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5307 args = self._parse_csv(self._parse_table) 5308 return exp.JoinHint(this=func_name.upper(), expressions=args) 5309 5310 def _parse_substring(self) -> exp.Substring: 5311 # Postgres supports the form: substring(string [from int] [for int]) 5312 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5313 5314 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5315 5316 if self._match(TokenType.FROM): 5317 args.append(self._parse_bitwise()) 5318 if self._match(TokenType.FOR): 5319 args.append(self._parse_bitwise()) 5320 5321 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5322 5323 def _parse_trim(self) -> exp.Trim: 5324 # https://www.w3resource.com/sql/character-functions/trim.php 5325 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5326 5327 position = None 5328 collation = None 5329 expression = None 5330 5331 if self._match_texts(self.TRIM_TYPES): 5332 position = self._prev.text.upper() 5333 5334 this = self._parse_bitwise() 5335 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5336 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5337 expression = self._parse_bitwise() 5338 5339 if invert_order: 5340 this, expression = expression, this 5341 5342 if self._match(TokenType.COLLATE): 5343 collation = self._parse_bitwise() 5344 5345 return self.expression( 5346 exp.Trim, this=this, position=position, expression=expression, collation=collation 5347 ) 5348 5349 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5350 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5351 5352 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5353 return self._parse_window(self._parse_id_var(), alias=True) 5354 5355 def _parse_respect_or_ignore_nulls( 5356 self, this: t.Optional[exp.Expression] 5357 ) -> t.Optional[exp.Expression]: 5358 if self._match_text_seq("IGNORE", "NULLS"): 5359 return self.expression(exp.IgnoreNulls, this=this) 5360 if self._match_text_seq("RESPECT", "NULLS"): 5361 return self.expression(exp.RespectNulls, this=this) 5362 return this 5363 5364 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5365 if self._match(TokenType.HAVING): 5366 self._match_texts(("MAX", "MIN")) 5367 max = self._prev.text.upper() != "MIN" 5368 return self.expression( 5369 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5370 ) 5371 5372 return this 5373 5374 def _parse_window( 5375 self, this: t.Optional[exp.Expression], alias: bool = False 5376 ) -> t.Optional[exp.Expression]: 5377 func = this 5378 comments = func.comments if isinstance(func, exp.Expression) else None 5379 5380 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5381 self._match(TokenType.WHERE) 5382 this = self.expression( 5383 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5384 ) 5385 self._match_r_paren() 5386 5387 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5388 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5389 if self._match_text_seq("WITHIN", "GROUP"): 5390 order = self._parse_wrapped(self._parse_order) 5391 this = self.expression(exp.WithinGroup, this=this, expression=order) 5392 5393 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5394 # Some dialects choose to implement and some do not. 5395 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5396 5397 # There is some code above in _parse_lambda that handles 5398 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5399 5400 # The below changes handle 5401 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5402 5403 # Oracle allows both formats 5404 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5405 # and Snowflake chose to do the same for familiarity 5406 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5407 if isinstance(this, exp.AggFunc): 5408 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5409 5410 if ignore_respect and ignore_respect is not this: 5411 ignore_respect.replace(ignore_respect.this) 5412 this = self.expression(ignore_respect.__class__, this=this) 5413 5414 this = self._parse_respect_or_ignore_nulls(this) 5415 5416 # bigquery select from window x AS (partition by ...) 5417 if alias: 5418 over = None 5419 self._match(TokenType.ALIAS) 5420 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5421 return this 5422 else: 5423 over = self._prev.text.upper() 5424 5425 if comments and isinstance(func, exp.Expression): 5426 func.pop_comments() 5427 5428 if not self._match(TokenType.L_PAREN): 5429 return self.expression( 5430 exp.Window, 5431 comments=comments, 5432 this=this, 5433 alias=self._parse_id_var(False), 5434 over=over, 5435 ) 5436 5437 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5438 5439 first = self._match(TokenType.FIRST) 5440 if self._match_text_seq("LAST"): 5441 first = False 5442 5443 partition, order = self._parse_partition_and_order() 5444 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5445 5446 if kind: 5447 self._match(TokenType.BETWEEN) 5448 start = self._parse_window_spec() 5449 self._match(TokenType.AND) 5450 end = self._parse_window_spec() 5451 5452 spec = self.expression( 5453 exp.WindowSpec, 5454 kind=kind, 5455 start=start["value"], 5456 start_side=start["side"], 5457 end=end["value"], 5458 end_side=end["side"], 5459 ) 5460 else: 5461 spec = None 5462 5463 self._match_r_paren() 5464 5465 window = self.expression( 5466 exp.Window, 5467 comments=comments, 5468 this=this, 5469 partition_by=partition, 5470 order=order, 5471 spec=spec, 5472 alias=window_alias, 5473 over=over, 5474 first=first, 5475 ) 5476 5477 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5478 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5479 return self._parse_window(window, alias=alias) 5480 5481 return window 5482 5483 def _parse_partition_and_order( 5484 self, 5485 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5486 return self._parse_partition_by(), self._parse_order() 5487 5488 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5489 self._match(TokenType.BETWEEN) 5490 5491 return { 5492 "value": ( 5493 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5494 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5495 or self._parse_bitwise() 5496 ), 5497 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5498 } 5499 5500 def _parse_alias( 5501 self, this: t.Optional[exp.Expression], explicit: bool = False 5502 ) -> t.Optional[exp.Expression]: 5503 any_token = self._match(TokenType.ALIAS) 5504 comments = self._prev_comments or [] 5505 5506 if explicit and not any_token: 5507 return this 5508 5509 if self._match(TokenType.L_PAREN): 5510 aliases = self.expression( 5511 exp.Aliases, 5512 comments=comments, 5513 this=this, 5514 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5515 ) 5516 self._match_r_paren(aliases) 5517 return aliases 5518 5519 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5520 self.STRING_ALIASES and self._parse_string_as_identifier() 5521 ) 5522 5523 if alias: 5524 comments.extend(alias.pop_comments()) 5525 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5526 column = this.this 5527 5528 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5529 if not this.comments and column and column.comments: 5530 this.comments = column.pop_comments() 5531 5532 return this 5533 5534 def _parse_id_var( 5535 self, 5536 any_token: bool = True, 5537 tokens: t.Optional[t.Collection[TokenType]] = None, 5538 ) -> t.Optional[exp.Expression]: 5539 expression = self._parse_identifier() 5540 if not expression and ( 5541 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5542 ): 5543 quoted = self._prev.token_type == TokenType.STRING 5544 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5545 5546 return expression 5547 5548 def _parse_string(self) -> t.Optional[exp.Expression]: 5549 if self._match_set(self.STRING_PARSERS): 5550 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5551 return self._parse_placeholder() 5552 5553 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5554 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5555 5556 def _parse_number(self) -> t.Optional[exp.Expression]: 5557 if self._match_set(self.NUMERIC_PARSERS): 5558 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5559 return self._parse_placeholder() 5560 5561 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5562 if self._match(TokenType.IDENTIFIER): 5563 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5564 return self._parse_placeholder() 5565 5566 def _parse_var( 5567 self, 5568 any_token: bool = False, 5569 tokens: t.Optional[t.Collection[TokenType]] = None, 5570 upper: bool = False, 5571 ) -> t.Optional[exp.Expression]: 5572 if ( 5573 (any_token and self._advance_any()) 5574 or self._match(TokenType.VAR) 5575 or (self._match_set(tokens) if tokens else False) 5576 ): 5577 return self.expression( 5578 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5579 ) 5580 return self._parse_placeholder() 5581 5582 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5583 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5584 self._advance() 5585 return self._prev 5586 return None 5587 5588 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5589 return self._parse_var() or self._parse_string() 5590 5591 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5592 return self._parse_primary() or self._parse_var(any_token=True) 5593 5594 def _parse_null(self) -> t.Optional[exp.Expression]: 5595 if self._match_set(self.NULL_TOKENS): 5596 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5597 return self._parse_placeholder() 5598 5599 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5600 if self._match(TokenType.TRUE): 5601 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5602 if self._match(TokenType.FALSE): 5603 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5604 return self._parse_placeholder() 5605 5606 def _parse_star(self) -> t.Optional[exp.Expression]: 5607 if self._match(TokenType.STAR): 5608 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5609 return self._parse_placeholder() 5610 5611 def _parse_parameter(self) -> exp.Parameter: 5612 self._match(TokenType.L_BRACE) 5613 this = self._parse_identifier() or self._parse_primary_or_var() 5614 expression = self._match(TokenType.COLON) and ( 5615 self._parse_identifier() or self._parse_primary_or_var() 5616 ) 5617 self._match(TokenType.R_BRACE) 5618 return self.expression(exp.Parameter, this=this, expression=expression) 5619 5620 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5621 if self._match_set(self.PLACEHOLDER_PARSERS): 5622 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5623 if placeholder: 5624 return placeholder 5625 self._advance(-1) 5626 return None 5627 5628 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5629 if not self._match(TokenType.EXCEPT): 5630 return None 5631 if self._match(TokenType.L_PAREN, advance=False): 5632 return self._parse_wrapped_csv(self._parse_column) 5633 5634 except_column = self._parse_column() 5635 return [except_column] if except_column else None 5636 5637 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5638 if not self._match(TokenType.REPLACE): 5639 return None 5640 if self._match(TokenType.L_PAREN, advance=False): 5641 return self._parse_wrapped_csv(self._parse_expression) 5642 5643 replace_expression = self._parse_expression() 5644 return [replace_expression] if replace_expression else None 5645 5646 def _parse_csv( 5647 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5648 ) -> t.List[exp.Expression]: 5649 parse_result = parse_method() 5650 items = [parse_result] if parse_result is not None else [] 5651 5652 while self._match(sep): 5653 self._add_comments(parse_result) 5654 parse_result = parse_method() 5655 if parse_result is not None: 5656 items.append(parse_result) 5657 5658 return items 5659 5660 def _parse_tokens( 5661 self, parse_method: t.Callable, expressions: t.Dict 5662 ) -> t.Optional[exp.Expression]: 5663 this = parse_method() 5664 5665 while self._match_set(expressions): 5666 this = self.expression( 5667 expressions[self._prev.token_type], 5668 this=this, 5669 comments=self._prev_comments, 5670 expression=parse_method(), 5671 ) 5672 5673 return this 5674 5675 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5676 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5677 5678 def _parse_wrapped_csv( 5679 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5680 ) -> t.List[exp.Expression]: 5681 return self._parse_wrapped( 5682 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5683 ) 5684 5685 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5686 wrapped = self._match(TokenType.L_PAREN) 5687 if not wrapped and not optional: 5688 self.raise_error("Expecting (") 5689 parse_result = parse_method() 5690 if wrapped: 5691 self._match_r_paren() 5692 return parse_result 5693 5694 def _parse_expressions(self) -> t.List[exp.Expression]: 5695 return self._parse_csv(self._parse_expression) 5696 5697 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5698 return self._parse_select() or self._parse_set_operations( 5699 self._parse_expression() if alias else self._parse_conjunction() 5700 ) 5701 5702 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5703 return self._parse_query_modifiers( 5704 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5705 ) 5706 5707 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5708 this = None 5709 if self._match_texts(self.TRANSACTION_KIND): 5710 this = self._prev.text 5711 5712 self._match_texts(("TRANSACTION", "WORK")) 5713 5714 modes = [] 5715 while True: 5716 mode = [] 5717 while self._match(TokenType.VAR): 5718 mode.append(self._prev.text) 5719 5720 if mode: 5721 modes.append(" ".join(mode)) 5722 if not self._match(TokenType.COMMA): 5723 break 5724 5725 return self.expression(exp.Transaction, this=this, modes=modes) 5726 5727 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5728 chain = None 5729 savepoint = None 5730 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5731 5732 self._match_texts(("TRANSACTION", "WORK")) 5733 5734 if self._match_text_seq("TO"): 5735 self._match_text_seq("SAVEPOINT") 5736 savepoint = self._parse_id_var() 5737 5738 if self._match(TokenType.AND): 5739 chain = not self._match_text_seq("NO") 5740 self._match_text_seq("CHAIN") 5741 5742 if is_rollback: 5743 return self.expression(exp.Rollback, savepoint=savepoint) 5744 5745 return self.expression(exp.Commit, chain=chain) 5746 5747 def _parse_refresh(self) -> exp.Refresh: 5748 self._match(TokenType.TABLE) 5749 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5750 5751 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5752 if not self._match_text_seq("ADD"): 5753 return None 5754 5755 self._match(TokenType.COLUMN) 5756 exists_column = self._parse_exists(not_=True) 5757 expression = self._parse_field_def() 5758 5759 if expression: 5760 expression.set("exists", exists_column) 5761 5762 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5763 if self._match_texts(("FIRST", "AFTER")): 5764 position = self._prev.text 5765 column_position = self.expression( 5766 exp.ColumnPosition, this=self._parse_column(), position=position 5767 ) 5768 expression.set("position", column_position) 5769 5770 return expression 5771 5772 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5773 drop = self._match(TokenType.DROP) and self._parse_drop() 5774 if drop and not isinstance(drop, exp.Command): 5775 drop.set("kind", drop.args.get("kind", "COLUMN")) 5776 return drop 5777 5778 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5779 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5780 return self.expression( 5781 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5782 ) 5783 5784 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5785 index = self._index - 1 5786 5787 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5788 return self._parse_csv( 5789 lambda: self.expression( 5790 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5791 ) 5792 ) 5793 5794 self._retreat(index) 5795 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5796 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5797 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5798 5799 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5800 self._match(TokenType.COLUMN) 5801 column = self._parse_field(any_token=True) 5802 5803 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5804 return self.expression(exp.AlterColumn, this=column, drop=True) 5805 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5806 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5807 if self._match(TokenType.COMMENT): 5808 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5809 5810 self._match_text_seq("SET", "DATA") 5811 self._match_text_seq("TYPE") 5812 return self.expression( 5813 exp.AlterColumn, 5814 this=column, 5815 dtype=self._parse_types(), 5816 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5817 using=self._match(TokenType.USING) and self._parse_conjunction(), 5818 ) 5819 5820 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5821 index = self._index - 1 5822 5823 partition_exists = self._parse_exists() 5824 if self._match(TokenType.PARTITION, advance=False): 5825 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5826 5827 self._retreat(index) 5828 return self._parse_csv(self._parse_drop_column) 5829 5830 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5831 if self._match(TokenType.COLUMN): 5832 exists = self._parse_exists() 5833 old_column = self._parse_column() 5834 to = self._match_text_seq("TO") 5835 new_column = self._parse_column() 5836 5837 if old_column is None or to is None or new_column is None: 5838 return None 5839 5840 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5841 5842 self._match_text_seq("TO") 5843 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5844 5845 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5846 start = self._prev 5847 5848 if not self._match(TokenType.TABLE): 5849 return self._parse_as_command(start) 5850 5851 exists = self._parse_exists() 5852 only = self._match_text_seq("ONLY") 5853 this = self._parse_table(schema=True) 5854 5855 if self._next: 5856 self._advance() 5857 5858 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5859 if parser: 5860 actions = ensure_list(parser(self)) 5861 options = self._parse_csv(self._parse_property) 5862 5863 if not self._curr and actions: 5864 return self.expression( 5865 exp.AlterTable, 5866 this=this, 5867 exists=exists, 5868 actions=actions, 5869 only=only, 5870 options=options, 5871 ) 5872 5873 return self._parse_as_command(start) 5874 5875 def _parse_merge(self) -> exp.Merge: 5876 self._match(TokenType.INTO) 5877 target = self._parse_table() 5878 5879 if target and self._match(TokenType.ALIAS, advance=False): 5880 target.set("alias", self._parse_table_alias()) 5881 5882 self._match(TokenType.USING) 5883 using = self._parse_table() 5884 5885 self._match(TokenType.ON) 5886 on = self._parse_conjunction() 5887 5888 return self.expression( 5889 exp.Merge, 5890 this=target, 5891 using=using, 5892 on=on, 5893 expressions=self._parse_when_matched(), 5894 ) 5895 5896 def _parse_when_matched(self) -> t.List[exp.When]: 5897 whens = [] 5898 5899 while self._match(TokenType.WHEN): 5900 matched = not self._match(TokenType.NOT) 5901 self._match_text_seq("MATCHED") 5902 source = ( 5903 False 5904 if self._match_text_seq("BY", "TARGET") 5905 else self._match_text_seq("BY", "SOURCE") 5906 ) 5907 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5908 5909 self._match(TokenType.THEN) 5910 5911 if self._match(TokenType.INSERT): 5912 _this = self._parse_star() 5913 if _this: 5914 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5915 else: 5916 then = self.expression( 5917 exp.Insert, 5918 this=self._parse_value(), 5919 expression=self._match_text_seq("VALUES") and self._parse_value(), 5920 ) 5921 elif self._match(TokenType.UPDATE): 5922 expressions = self._parse_star() 5923 if expressions: 5924 then = self.expression(exp.Update, expressions=expressions) 5925 else: 5926 then = self.expression( 5927 exp.Update, 5928 expressions=self._match(TokenType.SET) 5929 and self._parse_csv(self._parse_equality), 5930 ) 5931 elif self._match(TokenType.DELETE): 5932 then = self.expression(exp.Var, this=self._prev.text) 5933 else: 5934 then = None 5935 5936 whens.append( 5937 self.expression( 5938 exp.When, 5939 matched=matched, 5940 source=source, 5941 condition=condition, 5942 then=then, 5943 ) 5944 ) 5945 return whens 5946 5947 def _parse_show(self) -> t.Optional[exp.Expression]: 5948 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5949 if parser: 5950 return parser(self) 5951 return self._parse_as_command(self._prev) 5952 5953 def _parse_set_item_assignment( 5954 self, kind: t.Optional[str] = None 5955 ) -> t.Optional[exp.Expression]: 5956 index = self._index 5957 5958 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5959 return self._parse_set_transaction(global_=kind == "GLOBAL") 5960 5961 left = self._parse_primary() or self._parse_id_var() 5962 assignment_delimiter = self._match_texts(("=", "TO")) 5963 5964 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5965 self._retreat(index) 5966 return None 5967 5968 right = self._parse_statement() or self._parse_id_var() 5969 this = self.expression(exp.EQ, this=left, expression=right) 5970 5971 return self.expression(exp.SetItem, this=this, kind=kind) 5972 5973 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5974 self._match_text_seq("TRANSACTION") 5975 characteristics = self._parse_csv( 5976 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5977 ) 5978 return self.expression( 5979 exp.SetItem, 5980 expressions=characteristics, 5981 kind="TRANSACTION", 5982 **{"global": global_}, # type: ignore 5983 ) 5984 5985 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5986 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5987 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5988 5989 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5990 index = self._index 5991 set_ = self.expression( 5992 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5993 ) 5994 5995 if self._curr: 5996 self._retreat(index) 5997 return self._parse_as_command(self._prev) 5998 5999 return set_ 6000 6001 def _parse_var_from_options( 6002 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6003 ) -> t.Optional[exp.Var]: 6004 start = self._curr 6005 if not start: 6006 return None 6007 6008 option = start.text.upper() 6009 continuations = options.get(option) 6010 6011 index = self._index 6012 self._advance() 6013 for keywords in continuations or []: 6014 if isinstance(keywords, str): 6015 keywords = (keywords,) 6016 6017 if self._match_text_seq(*keywords): 6018 option = f"{option} {' '.join(keywords)}" 6019 break 6020 else: 6021 if continuations or continuations is None: 6022 if raise_unmatched: 6023 self.raise_error(f"Unknown option {option}") 6024 6025 self._retreat(index) 6026 return None 6027 6028 return exp.var(option) 6029 6030 def _parse_as_command(self, start: Token) -> exp.Command: 6031 while self._curr: 6032 self._advance() 6033 text = self._find_sql(start, self._prev) 6034 size = len(start.text) 6035 self._warn_unsupported() 6036 return exp.Command(this=text[:size], expression=text[size:]) 6037 6038 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6039 settings = [] 6040 6041 self._match_l_paren() 6042 kind = self._parse_id_var() 6043 6044 if self._match(TokenType.L_PAREN): 6045 while True: 6046 key = self._parse_id_var() 6047 value = self._parse_primary() 6048 6049 if not key and value is None: 6050 break 6051 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6052 self._match(TokenType.R_PAREN) 6053 6054 self._match_r_paren() 6055 6056 return self.expression( 6057 exp.DictProperty, 6058 this=this, 6059 kind=kind.this if kind else None, 6060 settings=settings, 6061 ) 6062 6063 def _parse_dict_range(self, this: str) -> exp.DictRange: 6064 self._match_l_paren() 6065 has_min = self._match_text_seq("MIN") 6066 if has_min: 6067 min = self._parse_var() or self._parse_primary() 6068 self._match_text_seq("MAX") 6069 max = self._parse_var() or self._parse_primary() 6070 else: 6071 max = self._parse_var() or self._parse_primary() 6072 min = exp.Literal.number(0) 6073 self._match_r_paren() 6074 return self.expression(exp.DictRange, this=this, min=min, max=max) 6075 6076 def _parse_comprehension( 6077 self, this: t.Optional[exp.Expression] 6078 ) -> t.Optional[exp.Comprehension]: 6079 index = self._index 6080 expression = self._parse_column() 6081 if not self._match(TokenType.IN): 6082 self._retreat(index - 1) 6083 return None 6084 iterator = self._parse_column() 6085 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6086 return self.expression( 6087 exp.Comprehension, 6088 this=this, 6089 expression=expression, 6090 iterator=iterator, 6091 condition=condition, 6092 ) 6093 6094 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6095 if self._match(TokenType.HEREDOC_STRING): 6096 return self.expression(exp.Heredoc, this=self._prev.text) 6097 6098 if not self._match_text_seq("$"): 6099 return None 6100 6101 tags = ["$"] 6102 tag_text = None 6103 6104 if self._is_connected(): 6105 self._advance() 6106 tags.append(self._prev.text.upper()) 6107 else: 6108 self.raise_error("No closing $ found") 6109 6110 if tags[-1] != "$": 6111 if self._is_connected() and self._match_text_seq("$"): 6112 tag_text = tags[-1] 6113 tags.append("$") 6114 else: 6115 self.raise_error("No closing $ found") 6116 6117 heredoc_start = self._curr 6118 6119 while self._curr: 6120 if self._match_text_seq(*tags, advance=False): 6121 this = self._find_sql(heredoc_start, self._prev) 6122 self._advance(len(tags)) 6123 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6124 6125 self._advance() 6126 6127 self.raise_error(f"No closing {''.join(tags)} found") 6128 return None 6129 6130 def _find_parser( 6131 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6132 ) -> t.Optional[t.Callable]: 6133 if not self._curr: 6134 return None 6135 6136 index = self._index 6137 this = [] 6138 while True: 6139 # The current token might be multiple words 6140 curr = self._curr.text.upper() 6141 key = curr.split(" ") 6142 this.append(curr) 6143 6144 self._advance() 6145 result, trie = in_trie(trie, key) 6146 if result == TrieResult.FAILED: 6147 break 6148 6149 if result == TrieResult.EXISTS: 6150 subparser = parsers[" ".join(this)] 6151 return subparser 6152 6153 self._retreat(index) 6154 return None 6155 6156 def _match(self, token_type, advance=True, expression=None): 6157 if not self._curr: 6158 return None 6159 6160 if self._curr.token_type == token_type: 6161 if advance: 6162 self._advance() 6163 self._add_comments(expression) 6164 return True 6165 6166 return None 6167 6168 def _match_set(self, types, advance=True): 6169 if not self._curr: 6170 return None 6171 6172 if self._curr.token_type in types: 6173 if advance: 6174 self._advance() 6175 return True 6176 6177 return None 6178 6179 def _match_pair(self, token_type_a, token_type_b, advance=True): 6180 if not self._curr or not self._next: 6181 return None 6182 6183 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6184 if advance: 6185 self._advance(2) 6186 return True 6187 6188 return None 6189 6190 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6191 if not self._match(TokenType.L_PAREN, expression=expression): 6192 self.raise_error("Expecting (") 6193 6194 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6195 if not self._match(TokenType.R_PAREN, expression=expression): 6196 self.raise_error("Expecting )") 6197 6198 def _match_texts(self, texts, advance=True): 6199 if self._curr and self._curr.text.upper() in texts: 6200 if advance: 6201 self._advance() 6202 return True 6203 return None 6204 6205 def _match_text_seq(self, *texts, advance=True): 6206 index = self._index 6207 for text in texts: 6208 if self._curr and self._curr.text.upper() == text: 6209 self._advance() 6210 else: 6211 self._retreat(index) 6212 return None 6213 6214 if not advance: 6215 self._retreat(index) 6216 6217 return True 6218 6219 def _replace_lambda( 6220 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6221 ) -> t.Optional[exp.Expression]: 6222 if not node: 6223 return node 6224 6225 for column in node.find_all(exp.Column): 6226 if column.parts[0].name in lambda_variables: 6227 dot_or_id = column.to_dot() if column.table else column.this 6228 parent = column.parent 6229 6230 while isinstance(parent, exp.Dot): 6231 if not isinstance(parent.parent, exp.Dot): 6232 parent.replace(dot_or_id) 6233 break 6234 parent = parent.parent 6235 else: 6236 if column is node: 6237 node = dot_or_id 6238 else: 6239 column.replace(dot_or_id) 6240 return node 6241 6242 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6243 start = self._prev 6244 6245 # Not to be confused with TRUNCATE(number, decimals) function call 6246 if self._match(TokenType.L_PAREN): 6247 self._retreat(self._index - 2) 6248 return self._parse_function() 6249 6250 # Clickhouse supports TRUNCATE DATABASE as well 6251 is_database = self._match(TokenType.DATABASE) 6252 6253 self._match(TokenType.TABLE) 6254 6255 exists = self._parse_exists(not_=False) 6256 6257 expressions = self._parse_csv( 6258 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6259 ) 6260 6261 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6262 6263 if self._match_text_seq("RESTART", "IDENTITY"): 6264 identity = "RESTART" 6265 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6266 identity = "CONTINUE" 6267 else: 6268 identity = None 6269 6270 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6271 option = self._prev.text 6272 else: 6273 option = None 6274 6275 partition = self._parse_partition() 6276 6277 # Fallback case 6278 if self._curr: 6279 return self._parse_as_command(start) 6280 6281 return self.expression( 6282 exp.TruncateTable, 6283 expressions=expressions, 6284 is_database=is_database, 6285 exists=exists, 6286 cluster=cluster, 6287 identity=identity, 6288 option=option, 6289 partition=partition, 6290 ) 6291 6292 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6293 this = self._parse_ordered(self._parse_opclass) 6294 6295 if not self._match(TokenType.WITH): 6296 return this 6297 6298 op = self._parse_var(any_token=True) 6299 6300 return self.expression(exp.WithOperator, this=this, op=op) 6301 6302 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6303 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6304 6305 options = [] 6306 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6307 option = self._parse_unquoted_field() 6308 value = None 6309 # Some options are defined as functions with the values as params 6310 if not isinstance(option, exp.Func): 6311 # Different dialects might separate options and values by white space, "=" and "AS" 6312 self._match(TokenType.EQ) 6313 self._match(TokenType.ALIAS) 6314 value = self._parse_unquoted_field() 6315 6316 param = self.expression(exp.CopyParameter, this=option, expression=value) 6317 options.append(param) 6318 6319 if sep: 6320 self._match(sep) 6321 6322 return options 6323 6324 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6325 def parse_options(): 6326 opts = [] 6327 self._match(TokenType.EQ) 6328 self._match(TokenType.L_PAREN) 6329 while self._curr and not self._match(TokenType.R_PAREN): 6330 opts.append(self._parse_conjunction()) 6331 return opts 6332 6333 expr = self.expression(exp.Credentials) 6334 6335 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6336 expr.set("storage", self._parse_conjunction()) 6337 if self._match_text_seq("CREDENTIALS"): 6338 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6339 creds = parse_options() if self._match(TokenType.EQ) else self._parse_field() 6340 expr.set("credentials", creds) 6341 if self._match_text_seq("ENCRYPTION"): 6342 expr.set("encryption", parse_options()) 6343 if self._match_text_seq("IAM_ROLE"): 6344 expr.set("iam_role", self._parse_field()) 6345 if self._match_text_seq("REGION"): 6346 expr.set("region", self._parse_field()) 6347 6348 return expr 6349 6350 def _parse_copy(self): 6351 start = self._prev 6352 6353 self._match(TokenType.INTO) 6354 6355 this = ( 6356 self._parse_conjunction() 6357 if self._match(TokenType.L_PAREN, advance=False) 6358 else self._parse_table(schema=True) 6359 ) 6360 6361 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6362 6363 files = self._parse_csv(self._parse_conjunction) 6364 credentials = self._parse_credentials() 6365 6366 self._match_text_seq("WITH") 6367 6368 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6369 6370 # Fallback case 6371 if self._curr: 6372 return self._parse_as_command(start) 6373 6374 return self.expression( 6375 exp.Copy, 6376 this=this, 6377 kind=kind, 6378 credentials=credentials, 6379 files=files, 6380 params=params, 6381 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } - {TokenType.IDENTIFIER} 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.COPY, 334 TokenType.DEFAULT, 335 TokenType.DELETE, 336 TokenType.DESC, 337 TokenType.DESCRIBE, 338 TokenType.DICTIONARY, 339 TokenType.DIV, 340 TokenType.END, 341 TokenType.EXECUTE, 342 TokenType.ESCAPE, 343 TokenType.FALSE, 344 TokenType.FIRST, 345 TokenType.FILTER, 346 TokenType.FINAL, 347 TokenType.FORMAT, 348 TokenType.FULL, 349 TokenType.IDENTIFIER, 350 TokenType.IS, 351 TokenType.ISNULL, 352 TokenType.INTERVAL, 353 TokenType.KEEP, 354 TokenType.KILL, 355 TokenType.LEFT, 356 TokenType.LOAD, 357 TokenType.MERGE, 358 TokenType.NATURAL, 359 TokenType.NEXT, 360 TokenType.OFFSET, 361 TokenType.OPERATOR, 362 TokenType.ORDINALITY, 363 TokenType.OVERLAPS, 364 TokenType.OVERWRITE, 365 TokenType.PARTITION, 366 TokenType.PERCENT, 367 TokenType.PIVOT, 368 TokenType.PRAGMA, 369 TokenType.RANGE, 370 TokenType.RECURSIVE, 371 TokenType.REFERENCES, 372 TokenType.REFRESH, 373 TokenType.REPLACE, 374 TokenType.RIGHT, 375 TokenType.ROW, 376 TokenType.ROWS, 377 TokenType.SEMI, 378 TokenType.SET, 379 TokenType.SETTINGS, 380 TokenType.SHOW, 381 TokenType.TEMPORARY, 382 TokenType.TOP, 383 TokenType.TRUE, 384 TokenType.TRUNCATE, 385 TokenType.UNIQUE, 386 TokenType.UNPIVOT, 387 TokenType.UPDATE, 388 TokenType.USE, 389 TokenType.VOLATILE, 390 TokenType.WINDOW, 391 *CREATABLES, 392 *SUBQUERY_PREDICATES, 393 *TYPE_TOKENS, 394 *NO_PAREN_FUNCTIONS, 395 } 396 397 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 398 399 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 400 TokenType.ANTI, 401 TokenType.APPLY, 402 TokenType.ASOF, 403 TokenType.FULL, 404 TokenType.LEFT, 405 TokenType.LOCK, 406 TokenType.NATURAL, 407 TokenType.OFFSET, 408 TokenType.RIGHT, 409 TokenType.SEMI, 410 TokenType.WINDOW, 411 } 412 413 ALIAS_TOKENS = ID_VAR_TOKENS 414 415 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 416 417 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 418 419 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 420 421 FUNC_TOKENS = { 422 TokenType.COLLATE, 423 TokenType.COMMAND, 424 TokenType.CURRENT_DATE, 425 TokenType.CURRENT_DATETIME, 426 TokenType.CURRENT_TIMESTAMP, 427 TokenType.CURRENT_TIME, 428 TokenType.CURRENT_USER, 429 TokenType.FILTER, 430 TokenType.FIRST, 431 TokenType.FORMAT, 432 TokenType.GLOB, 433 TokenType.IDENTIFIER, 434 TokenType.INDEX, 435 TokenType.ISNULL, 436 TokenType.ILIKE, 437 TokenType.INSERT, 438 TokenType.LIKE, 439 TokenType.MERGE, 440 TokenType.OFFSET, 441 TokenType.PRIMARY_KEY, 442 TokenType.RANGE, 443 TokenType.REPLACE, 444 TokenType.RLIKE, 445 TokenType.ROW, 446 TokenType.UNNEST, 447 TokenType.VAR, 448 TokenType.LEFT, 449 TokenType.RIGHT, 450 TokenType.SEQUENCE, 451 TokenType.DATE, 452 TokenType.DATETIME, 453 TokenType.TABLE, 454 TokenType.TIMESTAMP, 455 TokenType.TIMESTAMPTZ, 456 TokenType.TRUNCATE, 457 TokenType.WINDOW, 458 TokenType.XOR, 459 *TYPE_TOKENS, 460 *SUBQUERY_PREDICATES, 461 } 462 463 CONJUNCTION = { 464 TokenType.AND: exp.And, 465 TokenType.OR: exp.Or, 466 } 467 468 EQUALITY = { 469 TokenType.COLON_EQ: exp.PropertyEQ, 470 TokenType.EQ: exp.EQ, 471 TokenType.NEQ: exp.NEQ, 472 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 473 } 474 475 COMPARISON = { 476 TokenType.GT: exp.GT, 477 TokenType.GTE: exp.GTE, 478 TokenType.LT: exp.LT, 479 TokenType.LTE: exp.LTE, 480 } 481 482 BITWISE = { 483 TokenType.AMP: exp.BitwiseAnd, 484 TokenType.CARET: exp.BitwiseXor, 485 TokenType.PIPE: exp.BitwiseOr, 486 } 487 488 TERM = { 489 TokenType.DASH: exp.Sub, 490 TokenType.PLUS: exp.Add, 491 TokenType.MOD: exp.Mod, 492 TokenType.COLLATE: exp.Collate, 493 } 494 495 FACTOR = { 496 TokenType.DIV: exp.IntDiv, 497 TokenType.LR_ARROW: exp.Distance, 498 TokenType.SLASH: exp.Div, 499 TokenType.STAR: exp.Mul, 500 } 501 502 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 503 504 TIMES = { 505 TokenType.TIME, 506 TokenType.TIMETZ, 507 } 508 509 TIMESTAMPS = { 510 TokenType.TIMESTAMP, 511 TokenType.TIMESTAMPTZ, 512 TokenType.TIMESTAMPLTZ, 513 *TIMES, 514 } 515 516 SET_OPERATIONS = { 517 TokenType.UNION, 518 TokenType.INTERSECT, 519 TokenType.EXCEPT, 520 } 521 522 JOIN_METHODS = { 523 TokenType.ASOF, 524 TokenType.NATURAL, 525 TokenType.POSITIONAL, 526 } 527 528 JOIN_SIDES = { 529 TokenType.LEFT, 530 TokenType.RIGHT, 531 TokenType.FULL, 532 } 533 534 JOIN_KINDS = { 535 TokenType.INNER, 536 TokenType.OUTER, 537 TokenType.CROSS, 538 TokenType.SEMI, 539 TokenType.ANTI, 540 } 541 542 JOIN_HINTS: t.Set[str] = set() 543 544 LAMBDAS = { 545 TokenType.ARROW: lambda self, expressions: self.expression( 546 exp.Lambda, 547 this=self._replace_lambda( 548 self._parse_conjunction(), 549 {node.name for node in expressions}, 550 ), 551 expressions=expressions, 552 ), 553 TokenType.FARROW: lambda self, expressions: self.expression( 554 exp.Kwarg, 555 this=exp.var(expressions[0].name), 556 expression=self._parse_conjunction(), 557 ), 558 } 559 560 COLUMN_OPERATORS = { 561 TokenType.DOT: None, 562 TokenType.DCOLON: lambda self, this, to: self.expression( 563 exp.Cast if self.STRICT_CAST else exp.TryCast, 564 this=this, 565 to=to, 566 ), 567 TokenType.ARROW: lambda self, this, path: self.expression( 568 exp.JSONExtract, 569 this=this, 570 expression=self.dialect.to_json_path(path), 571 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 572 ), 573 TokenType.DARROW: lambda self, this, path: self.expression( 574 exp.JSONExtractScalar, 575 this=this, 576 expression=self.dialect.to_json_path(path), 577 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 578 ), 579 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 580 exp.JSONBExtract, 581 this=this, 582 expression=path, 583 ), 584 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 585 exp.JSONBExtractScalar, 586 this=this, 587 expression=path, 588 ), 589 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 590 exp.JSONBContains, 591 this=this, 592 expression=key, 593 ), 594 } 595 596 EXPRESSION_PARSERS = { 597 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 598 exp.Column: lambda self: self._parse_column(), 599 exp.Condition: lambda self: self._parse_conjunction(), 600 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 601 exp.Expression: lambda self: self._parse_expression(), 602 exp.From: lambda self: self._parse_from(), 603 exp.Group: lambda self: self._parse_group(), 604 exp.Having: lambda self: self._parse_having(), 605 exp.Identifier: lambda self: self._parse_id_var(), 606 exp.Join: lambda self: self._parse_join(), 607 exp.Lambda: lambda self: self._parse_lambda(), 608 exp.Lateral: lambda self: self._parse_lateral(), 609 exp.Limit: lambda self: self._parse_limit(), 610 exp.Offset: lambda self: self._parse_offset(), 611 exp.Order: lambda self: self._parse_order(), 612 exp.Ordered: lambda self: self._parse_ordered(), 613 exp.Properties: lambda self: self._parse_properties(), 614 exp.Qualify: lambda self: self._parse_qualify(), 615 exp.Returning: lambda self: self._parse_returning(), 616 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 617 exp.Table: lambda self: self._parse_table_parts(), 618 exp.TableAlias: lambda self: self._parse_table_alias(), 619 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 620 exp.Where: lambda self: self._parse_where(), 621 exp.Window: lambda self: self._parse_named_window(), 622 exp.With: lambda self: self._parse_with(), 623 "JOIN_TYPE": lambda self: self._parse_join_parts(), 624 } 625 626 STATEMENT_PARSERS = { 627 TokenType.ALTER: lambda self: self._parse_alter(), 628 TokenType.BEGIN: lambda self: self._parse_transaction(), 629 TokenType.CACHE: lambda self: self._parse_cache(), 630 TokenType.COMMENT: lambda self: self._parse_comment(), 631 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 632 TokenType.COPY: lambda self: self._parse_copy(), 633 TokenType.CREATE: lambda self: self._parse_create(), 634 TokenType.DELETE: lambda self: self._parse_delete(), 635 TokenType.DESC: lambda self: self._parse_describe(), 636 TokenType.DESCRIBE: lambda self: self._parse_describe(), 637 TokenType.DROP: lambda self: self._parse_drop(), 638 TokenType.INSERT: lambda self: self._parse_insert(), 639 TokenType.KILL: lambda self: self._parse_kill(), 640 TokenType.LOAD: lambda self: self._parse_load(), 641 TokenType.MERGE: lambda self: self._parse_merge(), 642 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 643 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 644 TokenType.REFRESH: lambda self: self._parse_refresh(), 645 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 646 TokenType.SET: lambda self: self._parse_set(), 647 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 648 TokenType.UNCACHE: lambda self: self._parse_uncache(), 649 TokenType.UPDATE: lambda self: self._parse_update(), 650 TokenType.USE: lambda self: self.expression( 651 exp.Use, 652 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 653 this=self._parse_table(schema=False), 654 ), 655 } 656 657 UNARY_PARSERS = { 658 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 659 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 660 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 661 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 662 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 663 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 664 } 665 666 STRING_PARSERS = { 667 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 668 exp.RawString, this=token.text 669 ), 670 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 671 exp.National, this=token.text 672 ), 673 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 674 TokenType.STRING: lambda self, token: self.expression( 675 exp.Literal, this=token.text, is_string=True 676 ), 677 TokenType.UNICODE_STRING: lambda self, token: self.expression( 678 exp.UnicodeString, 679 this=token.text, 680 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 681 ), 682 } 683 684 NUMERIC_PARSERS = { 685 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 686 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 687 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 688 TokenType.NUMBER: lambda self, token: self.expression( 689 exp.Literal, this=token.text, is_string=False 690 ), 691 } 692 693 PRIMARY_PARSERS = { 694 **STRING_PARSERS, 695 **NUMERIC_PARSERS, 696 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 697 TokenType.NULL: lambda self, _: self.expression(exp.Null), 698 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 699 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 700 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 701 TokenType.STAR: lambda self, _: self.expression( 702 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 703 ), 704 } 705 706 PLACEHOLDER_PARSERS = { 707 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 708 TokenType.PARAMETER: lambda self: self._parse_parameter(), 709 TokenType.COLON: lambda self: ( 710 self.expression(exp.Placeholder, this=self._prev.text) 711 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 712 else None 713 ), 714 } 715 716 RANGE_PARSERS = { 717 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 718 TokenType.GLOB: binary_range_parser(exp.Glob), 719 TokenType.ILIKE: binary_range_parser(exp.ILike), 720 TokenType.IN: lambda self, this: self._parse_in(this), 721 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 722 TokenType.IS: lambda self, this: self._parse_is(this), 723 TokenType.LIKE: binary_range_parser(exp.Like), 724 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 725 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 726 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 727 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 728 } 729 730 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 731 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 732 "AUTO": lambda self: self._parse_auto_property(), 733 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 734 "BACKUP": lambda self: self.expression( 735 exp.BackupProperty, this=self._parse_var(any_token=True) 736 ), 737 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 738 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 739 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 740 "CHECKSUM": lambda self: self._parse_checksum(), 741 "CLUSTER BY": lambda self: self._parse_cluster(), 742 "CLUSTERED": lambda self: self._parse_clustered_by(), 743 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 744 exp.CollateProperty, **kwargs 745 ), 746 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 747 "CONTAINS": lambda self: self._parse_contains_property(), 748 "COPY": lambda self: self._parse_copy_property(), 749 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 750 "DEFINER": lambda self: self._parse_definer(), 751 "DETERMINISTIC": lambda self: self.expression( 752 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 753 ), 754 "DISTKEY": lambda self: self._parse_distkey(), 755 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 756 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 757 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 758 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 759 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 760 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 761 "FREESPACE": lambda self: self._parse_freespace(), 762 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 763 "HEAP": lambda self: self.expression(exp.HeapProperty), 764 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 765 "IMMUTABLE": lambda self: self.expression( 766 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 767 ), 768 "INHERITS": lambda self: self.expression( 769 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 770 ), 771 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 772 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 773 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 774 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 775 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 776 "LIKE": lambda self: self._parse_create_like(), 777 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 778 "LOCK": lambda self: self._parse_locking(), 779 "LOCKING": lambda self: self._parse_locking(), 780 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 781 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 782 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 783 "MODIFIES": lambda self: self._parse_modifies_property(), 784 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 785 "NO": lambda self: self._parse_no_property(), 786 "ON": lambda self: self._parse_on_property(), 787 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 788 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 789 "PARTITION": lambda self: self._parse_partitioned_of(), 790 "PARTITION BY": lambda self: self._parse_partitioned_by(), 791 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 792 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 793 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 794 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 795 "READS": lambda self: self._parse_reads_property(), 796 "REMOTE": lambda self: self._parse_remote_with_connection(), 797 "RETURNS": lambda self: self._parse_returns(), 798 "ROW": lambda self: self._parse_row(), 799 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 800 "SAMPLE": lambda self: self.expression( 801 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 802 ), 803 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 804 "SETTINGS": lambda self: self.expression( 805 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 806 ), 807 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 808 "SORTKEY": lambda self: self._parse_sortkey(), 809 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 810 "STABLE": lambda self: self.expression( 811 exp.StabilityProperty, this=exp.Literal.string("STABLE") 812 ), 813 "STORED": lambda self: self._parse_stored(), 814 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 815 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 816 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 817 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 818 "TO": lambda self: self._parse_to_table(), 819 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 820 "TRANSFORM": lambda self: self.expression( 821 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 822 ), 823 "TTL": lambda self: self._parse_ttl(), 824 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 825 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 826 "VOLATILE": lambda self: self._parse_volatile_property(), 827 "WITH": lambda self: self._parse_with_property(), 828 } 829 830 CONSTRAINT_PARSERS = { 831 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 832 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 833 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 834 "CHARACTER SET": lambda self: self.expression( 835 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 836 ), 837 "CHECK": lambda self: self.expression( 838 exp.CheckColumnConstraint, 839 this=self._parse_wrapped(self._parse_conjunction), 840 enforced=self._match_text_seq("ENFORCED"), 841 ), 842 "COLLATE": lambda self: self.expression( 843 exp.CollateColumnConstraint, this=self._parse_var() 844 ), 845 "COMMENT": lambda self: self.expression( 846 exp.CommentColumnConstraint, this=self._parse_string() 847 ), 848 "COMPRESS": lambda self: self._parse_compress(), 849 "CLUSTERED": lambda self: self.expression( 850 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "NONCLUSTERED": lambda self: self.expression( 853 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 854 ), 855 "DEFAULT": lambda self: self.expression( 856 exp.DefaultColumnConstraint, this=self._parse_bitwise() 857 ), 858 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 859 "EPHEMERAL": lambda self: self.expression( 860 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 861 ), 862 "EXCLUDE": lambda self: self.expression( 863 exp.ExcludeColumnConstraint, this=self._parse_index_params() 864 ), 865 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 866 "FORMAT": lambda self: self.expression( 867 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 868 ), 869 "GENERATED": lambda self: self._parse_generated_as_identity(), 870 "IDENTITY": lambda self: self._parse_auto_increment(), 871 "INLINE": lambda self: self._parse_inline(), 872 "LIKE": lambda self: self._parse_create_like(), 873 "NOT": lambda self: self._parse_not_constraint(), 874 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 875 "ON": lambda self: ( 876 self._match(TokenType.UPDATE) 877 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 878 ) 879 or self.expression(exp.OnProperty, this=self._parse_id_var()), 880 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 881 "PERIOD": lambda self: self._parse_period_for_system_time(), 882 "PRIMARY KEY": lambda self: self._parse_primary_key(), 883 "REFERENCES": lambda self: self._parse_references(match=False), 884 "TITLE": lambda self: self.expression( 885 exp.TitleColumnConstraint, this=self._parse_var_or_string() 886 ), 887 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 888 "UNIQUE": lambda self: self._parse_unique(), 889 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 890 "WITH": lambda self: self.expression( 891 exp.Properties, expressions=self._parse_wrapped_properties() 892 ), 893 } 894 895 ALTER_PARSERS = { 896 "ADD": lambda self: self._parse_alter_table_add(), 897 "ALTER": lambda self: self._parse_alter_table_alter(), 898 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 899 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 900 "DROP": lambda self: self._parse_alter_table_drop(), 901 "RENAME": lambda self: self._parse_alter_table_rename(), 902 } 903 904 SCHEMA_UNNAMED_CONSTRAINTS = { 905 "CHECK", 906 "EXCLUDE", 907 "FOREIGN KEY", 908 "LIKE", 909 "PERIOD", 910 "PRIMARY KEY", 911 "UNIQUE", 912 } 913 914 NO_PAREN_FUNCTION_PARSERS = { 915 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 916 "CASE": lambda self: self._parse_case(), 917 "IF": lambda self: self._parse_if(), 918 "NEXT": lambda self: self._parse_next_value_for(), 919 } 920 921 INVALID_FUNC_NAME_TOKENS = { 922 TokenType.IDENTIFIER, 923 TokenType.STRING, 924 } 925 926 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 927 928 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 929 930 FUNCTION_PARSERS = { 931 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 932 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 933 "DECODE": lambda self: self._parse_decode(), 934 "EXTRACT": lambda self: self._parse_extract(), 935 "JSON_OBJECT": lambda self: self._parse_json_object(), 936 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 937 "JSON_TABLE": lambda self: self._parse_json_table(), 938 "MATCH": lambda self: self._parse_match_against(), 939 "OPENJSON": lambda self: self._parse_open_json(), 940 "POSITION": lambda self: self._parse_position(), 941 "PREDICT": lambda self: self._parse_predict(), 942 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 943 "STRING_AGG": lambda self: self._parse_string_agg(), 944 "SUBSTRING": lambda self: self._parse_substring(), 945 "TRIM": lambda self: self._parse_trim(), 946 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 947 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 948 } 949 950 QUERY_MODIFIER_PARSERS = { 951 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 952 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 953 TokenType.WHERE: lambda self: ("where", self._parse_where()), 954 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 955 TokenType.HAVING: lambda self: ("having", self._parse_having()), 956 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 957 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 958 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 959 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 960 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 961 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 962 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 963 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 964 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 965 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 966 TokenType.CLUSTER_BY: lambda self: ( 967 "cluster", 968 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 969 ), 970 TokenType.DISTRIBUTE_BY: lambda self: ( 971 "distribute", 972 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 973 ), 974 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 975 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 976 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 977 } 978 979 SET_PARSERS = { 980 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 981 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 982 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 983 "TRANSACTION": lambda self: self._parse_set_transaction(), 984 } 985 986 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 987 988 TYPE_LITERAL_PARSERS = { 989 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 990 } 991 992 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 993 994 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 995 996 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 997 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 998 "ISOLATION": ( 999 ("LEVEL", "REPEATABLE", "READ"), 1000 ("LEVEL", "READ", "COMMITTED"), 1001 ("LEVEL", "READ", "UNCOMITTED"), 1002 ("LEVEL", "SERIALIZABLE"), 1003 ), 1004 "READ": ("WRITE", "ONLY"), 1005 } 1006 1007 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1008 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1009 ) 1010 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1011 1012 CREATE_SEQUENCE: OPTIONS_TYPE = { 1013 "SCALE": ("EXTEND", "NOEXTEND"), 1014 "SHARD": ("EXTEND", "NOEXTEND"), 1015 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1016 **dict.fromkeys( 1017 ( 1018 "SESSION", 1019 "GLOBAL", 1020 "KEEP", 1021 "NOKEEP", 1022 "ORDER", 1023 "NOORDER", 1024 "NOCACHE", 1025 "CYCLE", 1026 "NOCYCLE", 1027 "NOMINVALUE", 1028 "NOMAXVALUE", 1029 "NOSCALE", 1030 "NOSHARD", 1031 ), 1032 tuple(), 1033 ), 1034 } 1035 1036 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1037 1038 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1039 1040 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1041 1042 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1043 1044 CLONE_KEYWORDS = {"CLONE", "COPY"} 1045 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1046 1047 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1048 1049 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1050 1051 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1052 1053 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1054 1055 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1056 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1057 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1058 1059 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1060 1061 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1062 1063 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1064 1065 DISTINCT_TOKENS = {TokenType.DISTINCT} 1066 1067 NULL_TOKENS = {TokenType.NULL} 1068 1069 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1070 1071 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1072 1073 STRICT_CAST = True 1074 1075 PREFIXED_PIVOT_COLUMNS = False 1076 IDENTIFY_PIVOT_STRINGS = False 1077 1078 LOG_DEFAULTS_TO_LN = False 1079 1080 # Whether ADD is present for each column added by ALTER TABLE 1081 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1082 1083 # Whether the table sample clause expects CSV syntax 1084 TABLESAMPLE_CSV = False 1085 1086 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1087 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1088 1089 # Whether the TRIM function expects the characters to trim as its first argument 1090 TRIM_PATTERN_FIRST = False 1091 1092 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1093 STRING_ALIASES = False 1094 1095 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1096 MODIFIERS_ATTACHED_TO_UNION = True 1097 UNION_MODIFIERS = {"order", "limit", "offset"} 1098 1099 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1100 NO_PAREN_IF_COMMANDS = True 1101 1102 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1103 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1104 1105 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1106 # If this is True and '(' is not found, the keyword will be treated as an identifier 1107 VALUES_FOLLOWED_BY_PAREN = True 1108 1109 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1110 SUPPORTS_IMPLICIT_UNNEST = False 1111 1112 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1113 INTERVAL_SPANS = True 1114 1115 # Whether a PARTITION clause can follow a table reference 1116 SUPPORTS_PARTITION_SELECTION = False 1117 1118 __slots__ = ( 1119 "error_level", 1120 "error_message_context", 1121 "max_errors", 1122 "dialect", 1123 "sql", 1124 "errors", 1125 "_tokens", 1126 "_index", 1127 "_curr", 1128 "_next", 1129 "_prev", 1130 "_prev_comments", 1131 ) 1132 1133 # Autofilled 1134 SHOW_TRIE: t.Dict = {} 1135 SET_TRIE: t.Dict = {} 1136 1137 def __init__( 1138 self, 1139 error_level: t.Optional[ErrorLevel] = None, 1140 error_message_context: int = 100, 1141 max_errors: int = 3, 1142 dialect: DialectType = None, 1143 ): 1144 from sqlglot.dialects import Dialect 1145 1146 self.error_level = error_level or ErrorLevel.IMMEDIATE 1147 self.error_message_context = error_message_context 1148 self.max_errors = max_errors 1149 self.dialect = Dialect.get_or_raise(dialect) 1150 self.reset() 1151 1152 def reset(self): 1153 self.sql = "" 1154 self.errors = [] 1155 self._tokens = [] 1156 self._index = 0 1157 self._curr = None 1158 self._next = None 1159 self._prev = None 1160 self._prev_comments = None 1161 1162 def parse( 1163 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1164 ) -> t.List[t.Optional[exp.Expression]]: 1165 """ 1166 Parses a list of tokens and returns a list of syntax trees, one tree 1167 per parsed SQL statement. 1168 1169 Args: 1170 raw_tokens: The list of tokens. 1171 sql: The original SQL string, used to produce helpful debug messages. 1172 1173 Returns: 1174 The list of the produced syntax trees. 1175 """ 1176 return self._parse( 1177 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1178 ) 1179 1180 def parse_into( 1181 self, 1182 expression_types: exp.IntoType, 1183 raw_tokens: t.List[Token], 1184 sql: t.Optional[str] = None, 1185 ) -> t.List[t.Optional[exp.Expression]]: 1186 """ 1187 Parses a list of tokens into a given Expression type. If a collection of Expression 1188 types is given instead, this method will try to parse the token list into each one 1189 of them, stopping at the first for which the parsing succeeds. 1190 1191 Args: 1192 expression_types: The expression type(s) to try and parse the token list into. 1193 raw_tokens: The list of tokens. 1194 sql: The original SQL string, used to produce helpful debug messages. 1195 1196 Returns: 1197 The target Expression. 1198 """ 1199 errors = [] 1200 for expression_type in ensure_list(expression_types): 1201 parser = self.EXPRESSION_PARSERS.get(expression_type) 1202 if not parser: 1203 raise TypeError(f"No parser registered for {expression_type}") 1204 1205 try: 1206 return self._parse(parser, raw_tokens, sql) 1207 except ParseError as e: 1208 e.errors[0]["into_expression"] = expression_type 1209 errors.append(e) 1210 1211 raise ParseError( 1212 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1213 errors=merge_errors(errors), 1214 ) from errors[-1] 1215 1216 def _parse( 1217 self, 1218 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1219 raw_tokens: t.List[Token], 1220 sql: t.Optional[str] = None, 1221 ) -> t.List[t.Optional[exp.Expression]]: 1222 self.reset() 1223 self.sql = sql or "" 1224 1225 total = len(raw_tokens) 1226 chunks: t.List[t.List[Token]] = [[]] 1227 1228 for i, token in enumerate(raw_tokens): 1229 if token.token_type == TokenType.SEMICOLON: 1230 if i < total - 1: 1231 chunks.append([]) 1232 else: 1233 chunks[-1].append(token) 1234 1235 expressions = [] 1236 1237 for tokens in chunks: 1238 self._index = -1 1239 self._tokens = tokens 1240 self._advance() 1241 1242 expressions.append(parse_method(self)) 1243 1244 if self._index < len(self._tokens): 1245 self.raise_error("Invalid expression / Unexpected token") 1246 1247 self.check_errors() 1248 1249 return expressions 1250 1251 def check_errors(self) -> None: 1252 """Logs or raises any found errors, depending on the chosen error level setting.""" 1253 if self.error_level == ErrorLevel.WARN: 1254 for error in self.errors: 1255 logger.error(str(error)) 1256 elif self.error_level == ErrorLevel.RAISE and self.errors: 1257 raise ParseError( 1258 concat_messages(self.errors, self.max_errors), 1259 errors=merge_errors(self.errors), 1260 ) 1261 1262 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1263 """ 1264 Appends an error in the list of recorded errors or raises it, depending on the chosen 1265 error level setting. 1266 """ 1267 token = token or self._curr or self._prev or Token.string("") 1268 start = token.start 1269 end = token.end + 1 1270 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1271 highlight = self.sql[start:end] 1272 end_context = self.sql[end : end + self.error_message_context] 1273 1274 error = ParseError.new( 1275 f"{message}. Line {token.line}, Col: {token.col}.\n" 1276 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1277 description=message, 1278 line=token.line, 1279 col=token.col, 1280 start_context=start_context, 1281 highlight=highlight, 1282 end_context=end_context, 1283 ) 1284 1285 if self.error_level == ErrorLevel.IMMEDIATE: 1286 raise error 1287 1288 self.errors.append(error) 1289 1290 def expression( 1291 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1292 ) -> E: 1293 """ 1294 Creates a new, validated Expression. 1295 1296 Args: 1297 exp_class: The expression class to instantiate. 1298 comments: An optional list of comments to attach to the expression. 1299 kwargs: The arguments to set for the expression along with their respective values. 1300 1301 Returns: 1302 The target expression. 1303 """ 1304 instance = exp_class(**kwargs) 1305 instance.add_comments(comments) if comments else self._add_comments(instance) 1306 return self.validate_expression(instance) 1307 1308 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1309 if expression and self._prev_comments: 1310 expression.add_comments(self._prev_comments) 1311 self._prev_comments = None 1312 1313 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1314 """ 1315 Validates an Expression, making sure that all its mandatory arguments are set. 1316 1317 Args: 1318 expression: The expression to validate. 1319 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1320 1321 Returns: 1322 The validated expression. 1323 """ 1324 if self.error_level != ErrorLevel.IGNORE: 1325 for error_message in expression.error_messages(args): 1326 self.raise_error(error_message) 1327 1328 return expression 1329 1330 def _find_sql(self, start: Token, end: Token) -> str: 1331 return self.sql[start.start : end.end + 1] 1332 1333 def _is_connected(self) -> bool: 1334 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1335 1336 def _advance(self, times: int = 1) -> None: 1337 self._index += times 1338 self._curr = seq_get(self._tokens, self._index) 1339 self._next = seq_get(self._tokens, self._index + 1) 1340 1341 if self._index > 0: 1342 self._prev = self._tokens[self._index - 1] 1343 self._prev_comments = self._prev.comments 1344 else: 1345 self._prev = None 1346 self._prev_comments = None 1347 1348 def _retreat(self, index: int) -> None: 1349 if index != self._index: 1350 self._advance(index - self._index) 1351 1352 def _warn_unsupported(self) -> None: 1353 if len(self._tokens) <= 1: 1354 return 1355 1356 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1357 # interested in emitting a warning for the one being currently processed. 1358 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1359 1360 logger.warning( 1361 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1362 ) 1363 1364 def _parse_command(self) -> exp.Command: 1365 self._warn_unsupported() 1366 return self.expression( 1367 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1368 ) 1369 1370 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1371 """ 1372 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1373 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1374 the parser state accordingly 1375 """ 1376 index = self._index 1377 error_level = self.error_level 1378 1379 self.error_level = ErrorLevel.IMMEDIATE 1380 try: 1381 this = parse_method() 1382 except ParseError: 1383 this = None 1384 finally: 1385 if not this or retreat: 1386 self._retreat(index) 1387 self.error_level = error_level 1388 1389 return this 1390 1391 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1392 start = self._prev 1393 exists = self._parse_exists() if allow_exists else None 1394 1395 self._match(TokenType.ON) 1396 1397 materialized = self._match_text_seq("MATERIALIZED") 1398 kind = self._match_set(self.CREATABLES) and self._prev 1399 if not kind: 1400 return self._parse_as_command(start) 1401 1402 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1403 this = self._parse_user_defined_function(kind=kind.token_type) 1404 elif kind.token_type == TokenType.TABLE: 1405 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1406 elif kind.token_type == TokenType.COLUMN: 1407 this = self._parse_column() 1408 else: 1409 this = self._parse_id_var() 1410 1411 self._match(TokenType.IS) 1412 1413 return self.expression( 1414 exp.Comment, 1415 this=this, 1416 kind=kind.text, 1417 expression=self._parse_string(), 1418 exists=exists, 1419 materialized=materialized, 1420 ) 1421 1422 def _parse_to_table( 1423 self, 1424 ) -> exp.ToTableProperty: 1425 table = self._parse_table_parts(schema=True) 1426 return self.expression(exp.ToTableProperty, this=table) 1427 1428 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1429 def _parse_ttl(self) -> exp.Expression: 1430 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1431 this = self._parse_bitwise() 1432 1433 if self._match_text_seq("DELETE"): 1434 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1435 if self._match_text_seq("RECOMPRESS"): 1436 return self.expression( 1437 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1438 ) 1439 if self._match_text_seq("TO", "DISK"): 1440 return self.expression( 1441 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1442 ) 1443 if self._match_text_seq("TO", "VOLUME"): 1444 return self.expression( 1445 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1446 ) 1447 1448 return this 1449 1450 expressions = self._parse_csv(_parse_ttl_action) 1451 where = self._parse_where() 1452 group = self._parse_group() 1453 1454 aggregates = None 1455 if group and self._match(TokenType.SET): 1456 aggregates = self._parse_csv(self._parse_set_item) 1457 1458 return self.expression( 1459 exp.MergeTreeTTL, 1460 expressions=expressions, 1461 where=where, 1462 group=group, 1463 aggregates=aggregates, 1464 ) 1465 1466 def _parse_statement(self) -> t.Optional[exp.Expression]: 1467 if self._curr is None: 1468 return None 1469 1470 if self._match_set(self.STATEMENT_PARSERS): 1471 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1472 1473 if self._match_set(Tokenizer.COMMANDS): 1474 return self._parse_command() 1475 1476 expression = self._parse_expression() 1477 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1478 return self._parse_query_modifiers(expression) 1479 1480 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1481 start = self._prev 1482 temporary = self._match(TokenType.TEMPORARY) 1483 materialized = self._match_text_seq("MATERIALIZED") 1484 1485 kind = self._match_set(self.CREATABLES) and self._prev.text 1486 if not kind: 1487 return self._parse_as_command(start) 1488 1489 if_exists = exists or self._parse_exists() 1490 table = self._parse_table_parts( 1491 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1492 ) 1493 1494 if self._match(TokenType.L_PAREN, advance=False): 1495 expressions = self._parse_wrapped_csv(self._parse_types) 1496 else: 1497 expressions = None 1498 1499 return self.expression( 1500 exp.Drop, 1501 comments=start.comments, 1502 exists=if_exists, 1503 this=table, 1504 expressions=expressions, 1505 kind=kind, 1506 temporary=temporary, 1507 materialized=materialized, 1508 cascade=self._match_text_seq("CASCADE"), 1509 constraints=self._match_text_seq("CONSTRAINTS"), 1510 purge=self._match_text_seq("PURGE"), 1511 ) 1512 1513 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1514 return ( 1515 self._match_text_seq("IF") 1516 and (not not_ or self._match(TokenType.NOT)) 1517 and self._match(TokenType.EXISTS) 1518 ) 1519 1520 def _parse_create(self) -> exp.Create | exp.Command: 1521 # Note: this can't be None because we've matched a statement parser 1522 start = self._prev 1523 comments = self._prev_comments 1524 1525 replace = ( 1526 start.token_type == TokenType.REPLACE 1527 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1528 or self._match_pair(TokenType.OR, TokenType.ALTER) 1529 ) 1530 1531 unique = self._match(TokenType.UNIQUE) 1532 1533 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1534 self._advance() 1535 1536 properties = None 1537 create_token = self._match_set(self.CREATABLES) and self._prev 1538 1539 if not create_token: 1540 # exp.Properties.Location.POST_CREATE 1541 properties = self._parse_properties() 1542 create_token = self._match_set(self.CREATABLES) and self._prev 1543 1544 if not properties or not create_token: 1545 return self._parse_as_command(start) 1546 1547 exists = self._parse_exists(not_=True) 1548 this = None 1549 expression: t.Optional[exp.Expression] = None 1550 indexes = None 1551 no_schema_binding = None 1552 begin = None 1553 end = None 1554 clone = None 1555 1556 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1557 nonlocal properties 1558 if properties and temp_props: 1559 properties.expressions.extend(temp_props.expressions) 1560 elif temp_props: 1561 properties = temp_props 1562 1563 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1564 this = self._parse_user_defined_function(kind=create_token.token_type) 1565 1566 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1567 extend_props(self._parse_properties()) 1568 1569 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1570 1571 if not expression: 1572 if self._match(TokenType.COMMAND): 1573 expression = self._parse_as_command(self._prev) 1574 else: 1575 begin = self._match(TokenType.BEGIN) 1576 return_ = self._match_text_seq("RETURN") 1577 1578 if self._match(TokenType.STRING, advance=False): 1579 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1580 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1581 expression = self._parse_string() 1582 extend_props(self._parse_properties()) 1583 else: 1584 expression = self._parse_statement() 1585 1586 end = self._match_text_seq("END") 1587 1588 if return_: 1589 expression = self.expression(exp.Return, this=expression) 1590 elif create_token.token_type == TokenType.INDEX: 1591 this = self._parse_index(index=self._parse_id_var()) 1592 elif create_token.token_type in self.DB_CREATABLES: 1593 table_parts = self._parse_table_parts( 1594 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1595 ) 1596 1597 # exp.Properties.Location.POST_NAME 1598 self._match(TokenType.COMMA) 1599 extend_props(self._parse_properties(before=True)) 1600 1601 this = self._parse_schema(this=table_parts) 1602 1603 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1604 extend_props(self._parse_properties()) 1605 1606 self._match(TokenType.ALIAS) 1607 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1608 # exp.Properties.Location.POST_ALIAS 1609 extend_props(self._parse_properties()) 1610 1611 if create_token.token_type == TokenType.SEQUENCE: 1612 expression = self._parse_types() 1613 extend_props(self._parse_properties()) 1614 else: 1615 expression = self._parse_ddl_select() 1616 1617 if create_token.token_type == TokenType.TABLE: 1618 # exp.Properties.Location.POST_EXPRESSION 1619 extend_props(self._parse_properties()) 1620 1621 indexes = [] 1622 while True: 1623 index = self._parse_index() 1624 1625 # exp.Properties.Location.POST_INDEX 1626 extend_props(self._parse_properties()) 1627 1628 if not index: 1629 break 1630 else: 1631 self._match(TokenType.COMMA) 1632 indexes.append(index) 1633 elif create_token.token_type == TokenType.VIEW: 1634 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1635 no_schema_binding = True 1636 1637 shallow = self._match_text_seq("SHALLOW") 1638 1639 if self._match_texts(self.CLONE_KEYWORDS): 1640 copy = self._prev.text.lower() == "copy" 1641 clone = self.expression( 1642 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1643 ) 1644 1645 if self._curr: 1646 return self._parse_as_command(start) 1647 1648 return self.expression( 1649 exp.Create, 1650 comments=comments, 1651 this=this, 1652 kind=create_token.text.upper(), 1653 replace=replace, 1654 unique=unique, 1655 expression=expression, 1656 exists=exists, 1657 properties=properties, 1658 indexes=indexes, 1659 no_schema_binding=no_schema_binding, 1660 begin=begin, 1661 end=end, 1662 clone=clone, 1663 ) 1664 1665 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1666 seq = exp.SequenceProperties() 1667 1668 options = [] 1669 index = self._index 1670 1671 while self._curr: 1672 if self._match_text_seq("INCREMENT"): 1673 self._match_text_seq("BY") 1674 self._match_text_seq("=") 1675 seq.set("increment", self._parse_term()) 1676 elif self._match_text_seq("MINVALUE"): 1677 seq.set("minvalue", self._parse_term()) 1678 elif self._match_text_seq("MAXVALUE"): 1679 seq.set("maxvalue", self._parse_term()) 1680 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1681 self._match_text_seq("=") 1682 seq.set("start", self._parse_term()) 1683 elif self._match_text_seq("CACHE"): 1684 # T-SQL allows empty CACHE which is initialized dynamically 1685 seq.set("cache", self._parse_number() or True) 1686 elif self._match_text_seq("OWNED", "BY"): 1687 # "OWNED BY NONE" is the default 1688 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1689 else: 1690 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1691 if opt: 1692 options.append(opt) 1693 else: 1694 break 1695 1696 seq.set("options", options if options else None) 1697 return None if self._index == index else seq 1698 1699 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1700 # only used for teradata currently 1701 self._match(TokenType.COMMA) 1702 1703 kwargs = { 1704 "no": self._match_text_seq("NO"), 1705 "dual": self._match_text_seq("DUAL"), 1706 "before": self._match_text_seq("BEFORE"), 1707 "default": self._match_text_seq("DEFAULT"), 1708 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1709 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1710 "after": self._match_text_seq("AFTER"), 1711 "minimum": self._match_texts(("MIN", "MINIMUM")), 1712 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1713 } 1714 1715 if self._match_texts(self.PROPERTY_PARSERS): 1716 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1717 try: 1718 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1719 except TypeError: 1720 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1721 1722 return None 1723 1724 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1725 return self._parse_wrapped_csv(self._parse_property) 1726 1727 def _parse_property(self) -> t.Optional[exp.Expression]: 1728 if self._match_texts(self.PROPERTY_PARSERS): 1729 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1730 1731 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1732 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1733 1734 if self._match_text_seq("COMPOUND", "SORTKEY"): 1735 return self._parse_sortkey(compound=True) 1736 1737 if self._match_text_seq("SQL", "SECURITY"): 1738 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1739 1740 index = self._index 1741 key = self._parse_column() 1742 1743 if not self._match(TokenType.EQ): 1744 self._retreat(index) 1745 return self._parse_sequence_properties() 1746 1747 return self.expression( 1748 exp.Property, 1749 this=key.to_dot() if isinstance(key, exp.Column) else key, 1750 value=self._parse_bitwise() or self._parse_var(any_token=True), 1751 ) 1752 1753 def _parse_stored(self) -> exp.FileFormatProperty: 1754 self._match(TokenType.ALIAS) 1755 1756 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1757 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1758 1759 return self.expression( 1760 exp.FileFormatProperty, 1761 this=( 1762 self.expression( 1763 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1764 ) 1765 if input_format or output_format 1766 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1767 ), 1768 ) 1769 1770 def _parse_unquoted_field(self): 1771 field = self._parse_field() 1772 if isinstance(field, exp.Identifier) and not field.quoted: 1773 field = exp.var(field) 1774 1775 return field 1776 1777 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1778 self._match(TokenType.EQ) 1779 self._match(TokenType.ALIAS) 1780 1781 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1782 1783 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1784 properties = [] 1785 while True: 1786 if before: 1787 prop = self._parse_property_before() 1788 else: 1789 prop = self._parse_property() 1790 if not prop: 1791 break 1792 for p in ensure_list(prop): 1793 properties.append(p) 1794 1795 if properties: 1796 return self.expression(exp.Properties, expressions=properties) 1797 1798 return None 1799 1800 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1801 return self.expression( 1802 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1803 ) 1804 1805 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1806 if self._index >= 2: 1807 pre_volatile_token = self._tokens[self._index - 2] 1808 else: 1809 pre_volatile_token = None 1810 1811 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1812 return exp.VolatileProperty() 1813 1814 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1815 1816 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1817 self._match_pair(TokenType.EQ, TokenType.ON) 1818 1819 prop = self.expression(exp.WithSystemVersioningProperty) 1820 if self._match(TokenType.L_PAREN): 1821 self._match_text_seq("HISTORY_TABLE", "=") 1822 prop.set("this", self._parse_table_parts()) 1823 1824 if self._match(TokenType.COMMA): 1825 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1826 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1827 1828 self._match_r_paren() 1829 1830 return prop 1831 1832 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1833 if self._match(TokenType.L_PAREN, advance=False): 1834 return self._parse_wrapped_properties() 1835 1836 if self._match_text_seq("JOURNAL"): 1837 return self._parse_withjournaltable() 1838 1839 if self._match_texts(self.VIEW_ATTRIBUTES): 1840 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1841 1842 if self._match_text_seq("DATA"): 1843 return self._parse_withdata(no=False) 1844 elif self._match_text_seq("NO", "DATA"): 1845 return self._parse_withdata(no=True) 1846 1847 if not self._next: 1848 return None 1849 1850 return self._parse_withisolatedloading() 1851 1852 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1853 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1854 self._match(TokenType.EQ) 1855 1856 user = self._parse_id_var() 1857 self._match(TokenType.PARAMETER) 1858 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1859 1860 if not user or not host: 1861 return None 1862 1863 return exp.DefinerProperty(this=f"{user}@{host}") 1864 1865 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1866 self._match(TokenType.TABLE) 1867 self._match(TokenType.EQ) 1868 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1869 1870 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1871 return self.expression(exp.LogProperty, no=no) 1872 1873 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1874 return self.expression(exp.JournalProperty, **kwargs) 1875 1876 def _parse_checksum(self) -> exp.ChecksumProperty: 1877 self._match(TokenType.EQ) 1878 1879 on = None 1880 if self._match(TokenType.ON): 1881 on = True 1882 elif self._match_text_seq("OFF"): 1883 on = False 1884 1885 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1886 1887 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1888 return self.expression( 1889 exp.Cluster, 1890 expressions=( 1891 self._parse_wrapped_csv(self._parse_ordered) 1892 if wrapped 1893 else self._parse_csv(self._parse_ordered) 1894 ), 1895 ) 1896 1897 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1898 self._match_text_seq("BY") 1899 1900 self._match_l_paren() 1901 expressions = self._parse_csv(self._parse_column) 1902 self._match_r_paren() 1903 1904 if self._match_text_seq("SORTED", "BY"): 1905 self._match_l_paren() 1906 sorted_by = self._parse_csv(self._parse_ordered) 1907 self._match_r_paren() 1908 else: 1909 sorted_by = None 1910 1911 self._match(TokenType.INTO) 1912 buckets = self._parse_number() 1913 self._match_text_seq("BUCKETS") 1914 1915 return self.expression( 1916 exp.ClusteredByProperty, 1917 expressions=expressions, 1918 sorted_by=sorted_by, 1919 buckets=buckets, 1920 ) 1921 1922 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1923 if not self._match_text_seq("GRANTS"): 1924 self._retreat(self._index - 1) 1925 return None 1926 1927 return self.expression(exp.CopyGrantsProperty) 1928 1929 def _parse_freespace(self) -> exp.FreespaceProperty: 1930 self._match(TokenType.EQ) 1931 return self.expression( 1932 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1933 ) 1934 1935 def _parse_mergeblockratio( 1936 self, no: bool = False, default: bool = False 1937 ) -> exp.MergeBlockRatioProperty: 1938 if self._match(TokenType.EQ): 1939 return self.expression( 1940 exp.MergeBlockRatioProperty, 1941 this=self._parse_number(), 1942 percent=self._match(TokenType.PERCENT), 1943 ) 1944 1945 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1946 1947 def _parse_datablocksize( 1948 self, 1949 default: t.Optional[bool] = None, 1950 minimum: t.Optional[bool] = None, 1951 maximum: t.Optional[bool] = None, 1952 ) -> exp.DataBlocksizeProperty: 1953 self._match(TokenType.EQ) 1954 size = self._parse_number() 1955 1956 units = None 1957 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1958 units = self._prev.text 1959 1960 return self.expression( 1961 exp.DataBlocksizeProperty, 1962 size=size, 1963 units=units, 1964 default=default, 1965 minimum=minimum, 1966 maximum=maximum, 1967 ) 1968 1969 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1970 self._match(TokenType.EQ) 1971 always = self._match_text_seq("ALWAYS") 1972 manual = self._match_text_seq("MANUAL") 1973 never = self._match_text_seq("NEVER") 1974 default = self._match_text_seq("DEFAULT") 1975 1976 autotemp = None 1977 if self._match_text_seq("AUTOTEMP"): 1978 autotemp = self._parse_schema() 1979 1980 return self.expression( 1981 exp.BlockCompressionProperty, 1982 always=always, 1983 manual=manual, 1984 never=never, 1985 default=default, 1986 autotemp=autotemp, 1987 ) 1988 1989 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1990 index = self._index 1991 no = self._match_text_seq("NO") 1992 concurrent = self._match_text_seq("CONCURRENT") 1993 1994 if not self._match_text_seq("ISOLATED", "LOADING"): 1995 self._retreat(index) 1996 return None 1997 1998 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1999 return self.expression( 2000 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2001 ) 2002 2003 def _parse_locking(self) -> exp.LockingProperty: 2004 if self._match(TokenType.TABLE): 2005 kind = "TABLE" 2006 elif self._match(TokenType.VIEW): 2007 kind = "VIEW" 2008 elif self._match(TokenType.ROW): 2009 kind = "ROW" 2010 elif self._match_text_seq("DATABASE"): 2011 kind = "DATABASE" 2012 else: 2013 kind = None 2014 2015 if kind in ("DATABASE", "TABLE", "VIEW"): 2016 this = self._parse_table_parts() 2017 else: 2018 this = None 2019 2020 if self._match(TokenType.FOR): 2021 for_or_in = "FOR" 2022 elif self._match(TokenType.IN): 2023 for_or_in = "IN" 2024 else: 2025 for_or_in = None 2026 2027 if self._match_text_seq("ACCESS"): 2028 lock_type = "ACCESS" 2029 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2030 lock_type = "EXCLUSIVE" 2031 elif self._match_text_seq("SHARE"): 2032 lock_type = "SHARE" 2033 elif self._match_text_seq("READ"): 2034 lock_type = "READ" 2035 elif self._match_text_seq("WRITE"): 2036 lock_type = "WRITE" 2037 elif self._match_text_seq("CHECKSUM"): 2038 lock_type = "CHECKSUM" 2039 else: 2040 lock_type = None 2041 2042 override = self._match_text_seq("OVERRIDE") 2043 2044 return self.expression( 2045 exp.LockingProperty, 2046 this=this, 2047 kind=kind, 2048 for_or_in=for_or_in, 2049 lock_type=lock_type, 2050 override=override, 2051 ) 2052 2053 def _parse_partition_by(self) -> t.List[exp.Expression]: 2054 if self._match(TokenType.PARTITION_BY): 2055 return self._parse_csv(self._parse_conjunction) 2056 return [] 2057 2058 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2059 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2060 if self._match_text_seq("MINVALUE"): 2061 return exp.var("MINVALUE") 2062 if self._match_text_seq("MAXVALUE"): 2063 return exp.var("MAXVALUE") 2064 return self._parse_bitwise() 2065 2066 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2067 expression = None 2068 from_expressions = None 2069 to_expressions = None 2070 2071 if self._match(TokenType.IN): 2072 this = self._parse_wrapped_csv(self._parse_bitwise) 2073 elif self._match(TokenType.FROM): 2074 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2075 self._match_text_seq("TO") 2076 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2077 elif self._match_text_seq("WITH", "(", "MODULUS"): 2078 this = self._parse_number() 2079 self._match_text_seq(",", "REMAINDER") 2080 expression = self._parse_number() 2081 self._match_r_paren() 2082 else: 2083 self.raise_error("Failed to parse partition bound spec.") 2084 2085 return self.expression( 2086 exp.PartitionBoundSpec, 2087 this=this, 2088 expression=expression, 2089 from_expressions=from_expressions, 2090 to_expressions=to_expressions, 2091 ) 2092 2093 # https://www.postgresql.org/docs/current/sql-createtable.html 2094 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2095 if not self._match_text_seq("OF"): 2096 self._retreat(self._index - 1) 2097 return None 2098 2099 this = self._parse_table(schema=True) 2100 2101 if self._match(TokenType.DEFAULT): 2102 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2103 elif self._match_text_seq("FOR", "VALUES"): 2104 expression = self._parse_partition_bound_spec() 2105 else: 2106 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2107 2108 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2109 2110 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2111 self._match(TokenType.EQ) 2112 return self.expression( 2113 exp.PartitionedByProperty, 2114 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2115 ) 2116 2117 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2118 if self._match_text_seq("AND", "STATISTICS"): 2119 statistics = True 2120 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2121 statistics = False 2122 else: 2123 statistics = None 2124 2125 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2126 2127 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2128 if self._match_text_seq("SQL"): 2129 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2130 return None 2131 2132 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2133 if self._match_text_seq("SQL", "DATA"): 2134 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2135 return None 2136 2137 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2138 if self._match_text_seq("PRIMARY", "INDEX"): 2139 return exp.NoPrimaryIndexProperty() 2140 if self._match_text_seq("SQL"): 2141 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2142 return None 2143 2144 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2145 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2146 return exp.OnCommitProperty() 2147 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2148 return exp.OnCommitProperty(delete=True) 2149 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2150 2151 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2152 if self._match_text_seq("SQL", "DATA"): 2153 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2154 return None 2155 2156 def _parse_distkey(self) -> exp.DistKeyProperty: 2157 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2158 2159 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2160 table = self._parse_table(schema=True) 2161 2162 options = [] 2163 while self._match_texts(("INCLUDING", "EXCLUDING")): 2164 this = self._prev.text.upper() 2165 2166 id_var = self._parse_id_var() 2167 if not id_var: 2168 return None 2169 2170 options.append( 2171 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2172 ) 2173 2174 return self.expression(exp.LikeProperty, this=table, expressions=options) 2175 2176 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2177 return self.expression( 2178 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2179 ) 2180 2181 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2182 self._match(TokenType.EQ) 2183 return self.expression( 2184 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2185 ) 2186 2187 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2188 self._match_text_seq("WITH", "CONNECTION") 2189 return self.expression( 2190 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2191 ) 2192 2193 def _parse_returns(self) -> exp.ReturnsProperty: 2194 value: t.Optional[exp.Expression] 2195 is_table = self._match(TokenType.TABLE) 2196 2197 if is_table: 2198 if self._match(TokenType.LT): 2199 value = self.expression( 2200 exp.Schema, 2201 this="TABLE", 2202 expressions=self._parse_csv(self._parse_struct_types), 2203 ) 2204 if not self._match(TokenType.GT): 2205 self.raise_error("Expecting >") 2206 else: 2207 value = self._parse_schema(exp.var("TABLE")) 2208 else: 2209 value = self._parse_types() 2210 2211 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2212 2213 def _parse_describe(self) -> exp.Describe: 2214 kind = self._match_set(self.CREATABLES) and self._prev.text 2215 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2216 if self._match(TokenType.DOT): 2217 style = None 2218 self._retreat(self._index - 2) 2219 this = self._parse_table(schema=True) 2220 properties = self._parse_properties() 2221 expressions = properties.expressions if properties else None 2222 return self.expression( 2223 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2224 ) 2225 2226 def _parse_insert(self) -> exp.Insert: 2227 comments = ensure_list(self._prev_comments) 2228 hint = self._parse_hint() 2229 overwrite = self._match(TokenType.OVERWRITE) 2230 ignore = self._match(TokenType.IGNORE) 2231 local = self._match_text_seq("LOCAL") 2232 alternative = None 2233 is_function = None 2234 2235 if self._match_text_seq("DIRECTORY"): 2236 this: t.Optional[exp.Expression] = self.expression( 2237 exp.Directory, 2238 this=self._parse_var_or_string(), 2239 local=local, 2240 row_format=self._parse_row_format(match_row=True), 2241 ) 2242 else: 2243 if self._match(TokenType.OR): 2244 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2245 2246 self._match(TokenType.INTO) 2247 comments += ensure_list(self._prev_comments) 2248 self._match(TokenType.TABLE) 2249 is_function = self._match(TokenType.FUNCTION) 2250 2251 this = ( 2252 self._parse_table(schema=True, parse_partition=True) 2253 if not is_function 2254 else self._parse_function() 2255 ) 2256 2257 returning = self._parse_returning() 2258 2259 return self.expression( 2260 exp.Insert, 2261 comments=comments, 2262 hint=hint, 2263 is_function=is_function, 2264 this=this, 2265 stored=self._match_text_seq("STORED") and self._parse_stored(), 2266 by_name=self._match_text_seq("BY", "NAME"), 2267 exists=self._parse_exists(), 2268 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2269 and self._parse_conjunction(), 2270 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2271 conflict=self._parse_on_conflict(), 2272 returning=returning or self._parse_returning(), 2273 overwrite=overwrite, 2274 alternative=alternative, 2275 ignore=ignore, 2276 ) 2277 2278 def _parse_kill(self) -> exp.Kill: 2279 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2280 2281 return self.expression( 2282 exp.Kill, 2283 this=self._parse_primary(), 2284 kind=kind, 2285 ) 2286 2287 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2288 conflict = self._match_text_seq("ON", "CONFLICT") 2289 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2290 2291 if not conflict and not duplicate: 2292 return None 2293 2294 conflict_keys = None 2295 constraint = None 2296 2297 if conflict: 2298 if self._match_text_seq("ON", "CONSTRAINT"): 2299 constraint = self._parse_id_var() 2300 elif self._match(TokenType.L_PAREN): 2301 conflict_keys = self._parse_csv(self._parse_id_var) 2302 self._match_r_paren() 2303 2304 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2305 if self._prev.token_type == TokenType.UPDATE: 2306 self._match(TokenType.SET) 2307 expressions = self._parse_csv(self._parse_equality) 2308 else: 2309 expressions = None 2310 2311 return self.expression( 2312 exp.OnConflict, 2313 duplicate=duplicate, 2314 expressions=expressions, 2315 action=action, 2316 conflict_keys=conflict_keys, 2317 constraint=constraint, 2318 ) 2319 2320 def _parse_returning(self) -> t.Optional[exp.Returning]: 2321 if not self._match(TokenType.RETURNING): 2322 return None 2323 return self.expression( 2324 exp.Returning, 2325 expressions=self._parse_csv(self._parse_expression), 2326 into=self._match(TokenType.INTO) and self._parse_table_part(), 2327 ) 2328 2329 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2330 if not self._match(TokenType.FORMAT): 2331 return None 2332 return self._parse_row_format() 2333 2334 def _parse_row_format( 2335 self, match_row: bool = False 2336 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2337 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2338 return None 2339 2340 if self._match_text_seq("SERDE"): 2341 this = self._parse_string() 2342 2343 serde_properties = None 2344 if self._match(TokenType.SERDE_PROPERTIES): 2345 serde_properties = self.expression( 2346 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2347 ) 2348 2349 return self.expression( 2350 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2351 ) 2352 2353 self._match_text_seq("DELIMITED") 2354 2355 kwargs = {} 2356 2357 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2358 kwargs["fields"] = self._parse_string() 2359 if self._match_text_seq("ESCAPED", "BY"): 2360 kwargs["escaped"] = self._parse_string() 2361 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2362 kwargs["collection_items"] = self._parse_string() 2363 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2364 kwargs["map_keys"] = self._parse_string() 2365 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2366 kwargs["lines"] = self._parse_string() 2367 if self._match_text_seq("NULL", "DEFINED", "AS"): 2368 kwargs["null"] = self._parse_string() 2369 2370 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2371 2372 def _parse_load(self) -> exp.LoadData | exp.Command: 2373 if self._match_text_seq("DATA"): 2374 local = self._match_text_seq("LOCAL") 2375 self._match_text_seq("INPATH") 2376 inpath = self._parse_string() 2377 overwrite = self._match(TokenType.OVERWRITE) 2378 self._match_pair(TokenType.INTO, TokenType.TABLE) 2379 2380 return self.expression( 2381 exp.LoadData, 2382 this=self._parse_table(schema=True), 2383 local=local, 2384 overwrite=overwrite, 2385 inpath=inpath, 2386 partition=self._parse_partition(), 2387 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2388 serde=self._match_text_seq("SERDE") and self._parse_string(), 2389 ) 2390 return self._parse_as_command(self._prev) 2391 2392 def _parse_delete(self) -> exp.Delete: 2393 # This handles MySQL's "Multiple-Table Syntax" 2394 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2395 tables = None 2396 comments = self._prev_comments 2397 if not self._match(TokenType.FROM, advance=False): 2398 tables = self._parse_csv(self._parse_table) or None 2399 2400 returning = self._parse_returning() 2401 2402 return self.expression( 2403 exp.Delete, 2404 comments=comments, 2405 tables=tables, 2406 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2407 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2408 where=self._parse_where(), 2409 returning=returning or self._parse_returning(), 2410 limit=self._parse_limit(), 2411 ) 2412 2413 def _parse_update(self) -> exp.Update: 2414 comments = self._prev_comments 2415 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2416 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2417 returning = self._parse_returning() 2418 return self.expression( 2419 exp.Update, 2420 comments=comments, 2421 **{ # type: ignore 2422 "this": this, 2423 "expressions": expressions, 2424 "from": self._parse_from(joins=True), 2425 "where": self._parse_where(), 2426 "returning": returning or self._parse_returning(), 2427 "order": self._parse_order(), 2428 "limit": self._parse_limit(), 2429 }, 2430 ) 2431 2432 def _parse_uncache(self) -> exp.Uncache: 2433 if not self._match(TokenType.TABLE): 2434 self.raise_error("Expecting TABLE after UNCACHE") 2435 2436 return self.expression( 2437 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2438 ) 2439 2440 def _parse_cache(self) -> exp.Cache: 2441 lazy = self._match_text_seq("LAZY") 2442 self._match(TokenType.TABLE) 2443 table = self._parse_table(schema=True) 2444 2445 options = [] 2446 if self._match_text_seq("OPTIONS"): 2447 self._match_l_paren() 2448 k = self._parse_string() 2449 self._match(TokenType.EQ) 2450 v = self._parse_string() 2451 options = [k, v] 2452 self._match_r_paren() 2453 2454 self._match(TokenType.ALIAS) 2455 return self.expression( 2456 exp.Cache, 2457 this=table, 2458 lazy=lazy, 2459 options=options, 2460 expression=self._parse_select(nested=True), 2461 ) 2462 2463 def _parse_partition(self) -> t.Optional[exp.Partition]: 2464 if not self._match(TokenType.PARTITION): 2465 return None 2466 2467 return self.expression( 2468 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2469 ) 2470 2471 def _parse_value(self) -> exp.Tuple: 2472 if self._match(TokenType.L_PAREN): 2473 expressions = self._parse_csv(self._parse_expression) 2474 self._match_r_paren() 2475 return self.expression(exp.Tuple, expressions=expressions) 2476 2477 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2478 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2479 2480 def _parse_projections(self) -> t.List[exp.Expression]: 2481 return self._parse_expressions() 2482 2483 def _parse_select( 2484 self, 2485 nested: bool = False, 2486 table: bool = False, 2487 parse_subquery_alias: bool = True, 2488 parse_set_operation: bool = True, 2489 ) -> t.Optional[exp.Expression]: 2490 cte = self._parse_with() 2491 2492 if cte: 2493 this = self._parse_statement() 2494 2495 if not this: 2496 self.raise_error("Failed to parse any statement following CTE") 2497 return cte 2498 2499 if "with" in this.arg_types: 2500 this.set("with", cte) 2501 else: 2502 self.raise_error(f"{this.key} does not support CTE") 2503 this = cte 2504 2505 return this 2506 2507 # duckdb supports leading with FROM x 2508 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2509 2510 if self._match(TokenType.SELECT): 2511 comments = self._prev_comments 2512 2513 hint = self._parse_hint() 2514 all_ = self._match(TokenType.ALL) 2515 distinct = self._match_set(self.DISTINCT_TOKENS) 2516 2517 kind = ( 2518 self._match(TokenType.ALIAS) 2519 and self._match_texts(("STRUCT", "VALUE")) 2520 and self._prev.text.upper() 2521 ) 2522 2523 if distinct: 2524 distinct = self.expression( 2525 exp.Distinct, 2526 on=self._parse_value() if self._match(TokenType.ON) else None, 2527 ) 2528 2529 if all_ and distinct: 2530 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2531 2532 limit = self._parse_limit(top=True) 2533 projections = self._parse_projections() 2534 2535 this = self.expression( 2536 exp.Select, 2537 kind=kind, 2538 hint=hint, 2539 distinct=distinct, 2540 expressions=projections, 2541 limit=limit, 2542 ) 2543 this.comments = comments 2544 2545 into = self._parse_into() 2546 if into: 2547 this.set("into", into) 2548 2549 if not from_: 2550 from_ = self._parse_from() 2551 2552 if from_: 2553 this.set("from", from_) 2554 2555 this = self._parse_query_modifiers(this) 2556 elif (table or nested) and self._match(TokenType.L_PAREN): 2557 if self._match(TokenType.PIVOT): 2558 this = self._parse_simplified_pivot() 2559 elif self._match(TokenType.FROM): 2560 this = exp.select("*").from_( 2561 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2562 ) 2563 else: 2564 this = ( 2565 self._parse_table() 2566 if table 2567 else self._parse_select(nested=True, parse_set_operation=False) 2568 ) 2569 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2570 2571 self._match_r_paren() 2572 2573 # We return early here so that the UNION isn't attached to the subquery by the 2574 # following call to _parse_set_operations, but instead becomes the parent node 2575 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2576 elif self._match(TokenType.VALUES, advance=False): 2577 this = self._parse_derived_table_values() 2578 elif from_: 2579 this = exp.select("*").from_(from_.this, copy=False) 2580 else: 2581 this = None 2582 2583 if parse_set_operation: 2584 return self._parse_set_operations(this) 2585 return this 2586 2587 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2588 if not skip_with_token and not self._match(TokenType.WITH): 2589 return None 2590 2591 comments = self._prev_comments 2592 recursive = self._match(TokenType.RECURSIVE) 2593 2594 expressions = [] 2595 while True: 2596 expressions.append(self._parse_cte()) 2597 2598 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2599 break 2600 else: 2601 self._match(TokenType.WITH) 2602 2603 return self.expression( 2604 exp.With, comments=comments, expressions=expressions, recursive=recursive 2605 ) 2606 2607 def _parse_cte(self) -> exp.CTE: 2608 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2609 if not alias or not alias.this: 2610 self.raise_error("Expected CTE to have alias") 2611 2612 self._match(TokenType.ALIAS) 2613 2614 if self._match_text_seq("NOT", "MATERIALIZED"): 2615 materialized = False 2616 elif self._match_text_seq("MATERIALIZED"): 2617 materialized = True 2618 else: 2619 materialized = None 2620 2621 return self.expression( 2622 exp.CTE, 2623 this=self._parse_wrapped(self._parse_statement), 2624 alias=alias, 2625 materialized=materialized, 2626 ) 2627 2628 def _parse_table_alias( 2629 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2630 ) -> t.Optional[exp.TableAlias]: 2631 any_token = self._match(TokenType.ALIAS) 2632 alias = ( 2633 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2634 or self._parse_string_as_identifier() 2635 ) 2636 2637 index = self._index 2638 if self._match(TokenType.L_PAREN): 2639 columns = self._parse_csv(self._parse_function_parameter) 2640 self._match_r_paren() if columns else self._retreat(index) 2641 else: 2642 columns = None 2643 2644 if not alias and not columns: 2645 return None 2646 2647 return self.expression(exp.TableAlias, this=alias, columns=columns) 2648 2649 def _parse_subquery( 2650 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2651 ) -> t.Optional[exp.Subquery]: 2652 if not this: 2653 return None 2654 2655 return self.expression( 2656 exp.Subquery, 2657 this=this, 2658 pivots=self._parse_pivots(), 2659 alias=self._parse_table_alias() if parse_alias else None, 2660 ) 2661 2662 def _implicit_unnests_to_explicit(self, this: E) -> E: 2663 from sqlglot.optimizer.normalize_identifiers import ( 2664 normalize_identifiers as _norm, 2665 ) 2666 2667 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2668 for i, join in enumerate(this.args.get("joins") or []): 2669 table = join.this 2670 normalized_table = table.copy() 2671 normalized_table.meta["maybe_column"] = True 2672 normalized_table = _norm(normalized_table, dialect=self.dialect) 2673 2674 if isinstance(table, exp.Table) and not join.args.get("on"): 2675 if normalized_table.parts[0].name in refs: 2676 table_as_column = table.to_column() 2677 unnest = exp.Unnest(expressions=[table_as_column]) 2678 2679 # Table.to_column creates a parent Alias node that we want to convert to 2680 # a TableAlias and attach to the Unnest, so it matches the parser's output 2681 if isinstance(table.args.get("alias"), exp.TableAlias): 2682 table_as_column.replace(table_as_column.this) 2683 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2684 2685 table.replace(unnest) 2686 2687 refs.add(normalized_table.alias_or_name) 2688 2689 return this 2690 2691 def _parse_query_modifiers( 2692 self, this: t.Optional[exp.Expression] 2693 ) -> t.Optional[exp.Expression]: 2694 if isinstance(this, (exp.Query, exp.Table)): 2695 for join in self._parse_joins(): 2696 this.append("joins", join) 2697 for lateral in iter(self._parse_lateral, None): 2698 this.append("laterals", lateral) 2699 2700 while True: 2701 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2702 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2703 key, expression = parser(self) 2704 2705 if expression: 2706 this.set(key, expression) 2707 if key == "limit": 2708 offset = expression.args.pop("offset", None) 2709 2710 if offset: 2711 offset = exp.Offset(expression=offset) 2712 this.set("offset", offset) 2713 2714 limit_by_expressions = expression.expressions 2715 expression.set("expressions", None) 2716 offset.set("expressions", limit_by_expressions) 2717 continue 2718 break 2719 2720 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2721 this = self._implicit_unnests_to_explicit(this) 2722 2723 return this 2724 2725 def _parse_hint(self) -> t.Optional[exp.Hint]: 2726 if self._match(TokenType.HINT): 2727 hints = [] 2728 for hint in iter( 2729 lambda: self._parse_csv( 2730 lambda: self._parse_function() or self._parse_var(upper=True) 2731 ), 2732 [], 2733 ): 2734 hints.extend(hint) 2735 2736 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2737 self.raise_error("Expected */ after HINT") 2738 2739 return self.expression(exp.Hint, expressions=hints) 2740 2741 return None 2742 2743 def _parse_into(self) -> t.Optional[exp.Into]: 2744 if not self._match(TokenType.INTO): 2745 return None 2746 2747 temp = self._match(TokenType.TEMPORARY) 2748 unlogged = self._match_text_seq("UNLOGGED") 2749 self._match(TokenType.TABLE) 2750 2751 return self.expression( 2752 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2753 ) 2754 2755 def _parse_from( 2756 self, joins: bool = False, skip_from_token: bool = False 2757 ) -> t.Optional[exp.From]: 2758 if not skip_from_token and not self._match(TokenType.FROM): 2759 return None 2760 2761 return self.expression( 2762 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2763 ) 2764 2765 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2766 return self.expression( 2767 exp.MatchRecognizeMeasure, 2768 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2769 this=self._parse_expression(), 2770 ) 2771 2772 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2773 if not self._match(TokenType.MATCH_RECOGNIZE): 2774 return None 2775 2776 self._match_l_paren() 2777 2778 partition = self._parse_partition_by() 2779 order = self._parse_order() 2780 2781 measures = ( 2782 self._parse_csv(self._parse_match_recognize_measure) 2783 if self._match_text_seq("MEASURES") 2784 else None 2785 ) 2786 2787 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2788 rows = exp.var("ONE ROW PER MATCH") 2789 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2790 text = "ALL ROWS PER MATCH" 2791 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2792 text += " SHOW EMPTY MATCHES" 2793 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2794 text += " OMIT EMPTY MATCHES" 2795 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2796 text += " WITH UNMATCHED ROWS" 2797 rows = exp.var(text) 2798 else: 2799 rows = None 2800 2801 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2802 text = "AFTER MATCH SKIP" 2803 if self._match_text_seq("PAST", "LAST", "ROW"): 2804 text += " PAST LAST ROW" 2805 elif self._match_text_seq("TO", "NEXT", "ROW"): 2806 text += " TO NEXT ROW" 2807 elif self._match_text_seq("TO", "FIRST"): 2808 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2809 elif self._match_text_seq("TO", "LAST"): 2810 text += f" TO LAST {self._advance_any().text}" # type: ignore 2811 after = exp.var(text) 2812 else: 2813 after = None 2814 2815 if self._match_text_seq("PATTERN"): 2816 self._match_l_paren() 2817 2818 if not self._curr: 2819 self.raise_error("Expecting )", self._curr) 2820 2821 paren = 1 2822 start = self._curr 2823 2824 while self._curr and paren > 0: 2825 if self._curr.token_type == TokenType.L_PAREN: 2826 paren += 1 2827 if self._curr.token_type == TokenType.R_PAREN: 2828 paren -= 1 2829 2830 end = self._prev 2831 self._advance() 2832 2833 if paren > 0: 2834 self.raise_error("Expecting )", self._curr) 2835 2836 pattern = exp.var(self._find_sql(start, end)) 2837 else: 2838 pattern = None 2839 2840 define = ( 2841 self._parse_csv(self._parse_name_as_expression) 2842 if self._match_text_seq("DEFINE") 2843 else None 2844 ) 2845 2846 self._match_r_paren() 2847 2848 return self.expression( 2849 exp.MatchRecognize, 2850 partition_by=partition, 2851 order=order, 2852 measures=measures, 2853 rows=rows, 2854 after=after, 2855 pattern=pattern, 2856 define=define, 2857 alias=self._parse_table_alias(), 2858 ) 2859 2860 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2861 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2862 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2863 cross_apply = False 2864 2865 if cross_apply is not None: 2866 this = self._parse_select(table=True) 2867 view = None 2868 outer = None 2869 elif self._match(TokenType.LATERAL): 2870 this = self._parse_select(table=True) 2871 view = self._match(TokenType.VIEW) 2872 outer = self._match(TokenType.OUTER) 2873 else: 2874 return None 2875 2876 if not this: 2877 this = ( 2878 self._parse_unnest() 2879 or self._parse_function() 2880 or self._parse_id_var(any_token=False) 2881 ) 2882 2883 while self._match(TokenType.DOT): 2884 this = exp.Dot( 2885 this=this, 2886 expression=self._parse_function() or self._parse_id_var(any_token=False), 2887 ) 2888 2889 if view: 2890 table = self._parse_id_var(any_token=False) 2891 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2892 table_alias: t.Optional[exp.TableAlias] = self.expression( 2893 exp.TableAlias, this=table, columns=columns 2894 ) 2895 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2896 # We move the alias from the lateral's child node to the lateral itself 2897 table_alias = this.args["alias"].pop() 2898 else: 2899 table_alias = self._parse_table_alias() 2900 2901 return self.expression( 2902 exp.Lateral, 2903 this=this, 2904 view=view, 2905 outer=outer, 2906 alias=table_alias, 2907 cross_apply=cross_apply, 2908 ) 2909 2910 def _parse_join_parts( 2911 self, 2912 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2913 return ( 2914 self._match_set(self.JOIN_METHODS) and self._prev, 2915 self._match_set(self.JOIN_SIDES) and self._prev, 2916 self._match_set(self.JOIN_KINDS) and self._prev, 2917 ) 2918 2919 def _parse_join( 2920 self, skip_join_token: bool = False, parse_bracket: bool = False 2921 ) -> t.Optional[exp.Join]: 2922 if self._match(TokenType.COMMA): 2923 return self.expression(exp.Join, this=self._parse_table()) 2924 2925 index = self._index 2926 method, side, kind = self._parse_join_parts() 2927 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2928 join = self._match(TokenType.JOIN) 2929 2930 if not skip_join_token and not join: 2931 self._retreat(index) 2932 kind = None 2933 method = None 2934 side = None 2935 2936 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2937 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2938 2939 if not skip_join_token and not join and not outer_apply and not cross_apply: 2940 return None 2941 2942 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2943 2944 if method: 2945 kwargs["method"] = method.text 2946 if side: 2947 kwargs["side"] = side.text 2948 if kind: 2949 kwargs["kind"] = kind.text 2950 if hint: 2951 kwargs["hint"] = hint 2952 2953 if self._match(TokenType.MATCH_CONDITION): 2954 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2955 2956 if self._match(TokenType.ON): 2957 kwargs["on"] = self._parse_conjunction() 2958 elif self._match(TokenType.USING): 2959 kwargs["using"] = self._parse_wrapped_id_vars() 2960 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2961 kind and kind.token_type == TokenType.CROSS 2962 ): 2963 index = self._index 2964 joins: t.Optional[list] = list(self._parse_joins()) 2965 2966 if joins and self._match(TokenType.ON): 2967 kwargs["on"] = self._parse_conjunction() 2968 elif joins and self._match(TokenType.USING): 2969 kwargs["using"] = self._parse_wrapped_id_vars() 2970 else: 2971 joins = None 2972 self._retreat(index) 2973 2974 kwargs["this"].set("joins", joins if joins else None) 2975 2976 comments = [c for token in (method, side, kind) if token for c in token.comments] 2977 return self.expression(exp.Join, comments=comments, **kwargs) 2978 2979 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2980 this = self._parse_conjunction() 2981 2982 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2983 return this 2984 2985 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2986 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2987 2988 return this 2989 2990 def _parse_index_params(self) -> exp.IndexParameters: 2991 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2992 2993 if self._match(TokenType.L_PAREN, advance=False): 2994 columns = self._parse_wrapped_csv(self._parse_with_operator) 2995 else: 2996 columns = None 2997 2998 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2999 partition_by = self._parse_partition_by() 3000 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3001 tablespace = ( 3002 self._parse_var(any_token=True) 3003 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3004 else None 3005 ) 3006 where = self._parse_where() 3007 3008 return self.expression( 3009 exp.IndexParameters, 3010 using=using, 3011 columns=columns, 3012 include=include, 3013 partition_by=partition_by, 3014 where=where, 3015 with_storage=with_storage, 3016 tablespace=tablespace, 3017 ) 3018 3019 def _parse_index( 3020 self, 3021 index: t.Optional[exp.Expression] = None, 3022 ) -> t.Optional[exp.Index]: 3023 if index: 3024 unique = None 3025 primary = None 3026 amp = None 3027 3028 self._match(TokenType.ON) 3029 self._match(TokenType.TABLE) # hive 3030 table = self._parse_table_parts(schema=True) 3031 else: 3032 unique = self._match(TokenType.UNIQUE) 3033 primary = self._match_text_seq("PRIMARY") 3034 amp = self._match_text_seq("AMP") 3035 3036 if not self._match(TokenType.INDEX): 3037 return None 3038 3039 index = self._parse_id_var() 3040 table = None 3041 3042 params = self._parse_index_params() 3043 3044 return self.expression( 3045 exp.Index, 3046 this=index, 3047 table=table, 3048 unique=unique, 3049 primary=primary, 3050 amp=amp, 3051 params=params, 3052 ) 3053 3054 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3055 hints: t.List[exp.Expression] = [] 3056 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3057 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3058 hints.append( 3059 self.expression( 3060 exp.WithTableHint, 3061 expressions=self._parse_csv( 3062 lambda: self._parse_function() or self._parse_var(any_token=True) 3063 ), 3064 ) 3065 ) 3066 self._match_r_paren() 3067 else: 3068 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3069 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3070 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3071 3072 self._match_texts(("INDEX", "KEY")) 3073 if self._match(TokenType.FOR): 3074 hint.set("target", self._advance_any() and self._prev.text.upper()) 3075 3076 hint.set("expressions", self._parse_wrapped_id_vars()) 3077 hints.append(hint) 3078 3079 return hints or None 3080 3081 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3082 return ( 3083 (not schema and self._parse_function(optional_parens=False)) 3084 or self._parse_id_var(any_token=False) 3085 or self._parse_string_as_identifier() 3086 or self._parse_placeholder() 3087 ) 3088 3089 def _parse_table_parts( 3090 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3091 ) -> exp.Table: 3092 catalog = None 3093 db = None 3094 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3095 3096 while self._match(TokenType.DOT): 3097 if catalog: 3098 # This allows nesting the table in arbitrarily many dot expressions if needed 3099 table = self.expression( 3100 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3101 ) 3102 else: 3103 catalog = db 3104 db = table 3105 # "" used for tsql FROM a..b case 3106 table = self._parse_table_part(schema=schema) or "" 3107 3108 if ( 3109 wildcard 3110 and self._is_connected() 3111 and (isinstance(table, exp.Identifier) or not table) 3112 and self._match(TokenType.STAR) 3113 ): 3114 if isinstance(table, exp.Identifier): 3115 table.args["this"] += "*" 3116 else: 3117 table = exp.Identifier(this="*") 3118 3119 # We bubble up comments from the Identifier to the Table 3120 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3121 3122 if is_db_reference: 3123 catalog = db 3124 db = table 3125 table = None 3126 3127 if not table and not is_db_reference: 3128 self.raise_error(f"Expected table name but got {self._curr}") 3129 if not db and is_db_reference: 3130 self.raise_error(f"Expected database name but got {self._curr}") 3131 3132 return self.expression( 3133 exp.Table, 3134 comments=comments, 3135 this=table, 3136 db=db, 3137 catalog=catalog, 3138 pivots=self._parse_pivots(), 3139 ) 3140 3141 def _parse_table( 3142 self, 3143 schema: bool = False, 3144 joins: bool = False, 3145 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3146 parse_bracket: bool = False, 3147 is_db_reference: bool = False, 3148 parse_partition: bool = False, 3149 ) -> t.Optional[exp.Expression]: 3150 lateral = self._parse_lateral() 3151 if lateral: 3152 return lateral 3153 3154 unnest = self._parse_unnest() 3155 if unnest: 3156 return unnest 3157 3158 values = self._parse_derived_table_values() 3159 if values: 3160 return values 3161 3162 subquery = self._parse_select(table=True) 3163 if subquery: 3164 if not subquery.args.get("pivots"): 3165 subquery.set("pivots", self._parse_pivots()) 3166 return subquery 3167 3168 bracket = parse_bracket and self._parse_bracket(None) 3169 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3170 3171 only = self._match(TokenType.ONLY) 3172 3173 this = t.cast( 3174 exp.Expression, 3175 bracket 3176 or self._parse_bracket( 3177 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3178 ), 3179 ) 3180 3181 if only: 3182 this.set("only", only) 3183 3184 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3185 self._match_text_seq("*") 3186 3187 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3188 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3189 this.set("partition", self._parse_partition()) 3190 3191 if schema: 3192 return self._parse_schema(this=this) 3193 3194 version = self._parse_version() 3195 3196 if version: 3197 this.set("version", version) 3198 3199 if self.dialect.ALIAS_POST_TABLESAMPLE: 3200 table_sample = self._parse_table_sample() 3201 3202 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3203 if alias: 3204 this.set("alias", alias) 3205 3206 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3207 return self.expression( 3208 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3209 ) 3210 3211 this.set("hints", self._parse_table_hints()) 3212 3213 if not this.args.get("pivots"): 3214 this.set("pivots", self._parse_pivots()) 3215 3216 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3217 table_sample = self._parse_table_sample() 3218 3219 if table_sample: 3220 table_sample.set("this", this) 3221 this = table_sample 3222 3223 if joins: 3224 for join in self._parse_joins(): 3225 this.append("joins", join) 3226 3227 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3228 this.set("ordinality", True) 3229 this.set("alias", self._parse_table_alias()) 3230 3231 return this 3232 3233 def _parse_version(self) -> t.Optional[exp.Version]: 3234 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3235 this = "TIMESTAMP" 3236 elif self._match(TokenType.VERSION_SNAPSHOT): 3237 this = "VERSION" 3238 else: 3239 return None 3240 3241 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3242 kind = self._prev.text.upper() 3243 start = self._parse_bitwise() 3244 self._match_texts(("TO", "AND")) 3245 end = self._parse_bitwise() 3246 expression: t.Optional[exp.Expression] = self.expression( 3247 exp.Tuple, expressions=[start, end] 3248 ) 3249 elif self._match_text_seq("CONTAINED", "IN"): 3250 kind = "CONTAINED IN" 3251 expression = self.expression( 3252 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3253 ) 3254 elif self._match(TokenType.ALL): 3255 kind = "ALL" 3256 expression = None 3257 else: 3258 self._match_text_seq("AS", "OF") 3259 kind = "AS OF" 3260 expression = self._parse_type() 3261 3262 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3263 3264 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3265 if not self._match(TokenType.UNNEST): 3266 return None 3267 3268 expressions = self._parse_wrapped_csv(self._parse_equality) 3269 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3270 3271 alias = self._parse_table_alias() if with_alias else None 3272 3273 if alias: 3274 if self.dialect.UNNEST_COLUMN_ONLY: 3275 if alias.args.get("columns"): 3276 self.raise_error("Unexpected extra column alias in unnest.") 3277 3278 alias.set("columns", [alias.this]) 3279 alias.set("this", None) 3280 3281 columns = alias.args.get("columns") or [] 3282 if offset and len(expressions) < len(columns): 3283 offset = columns.pop() 3284 3285 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3286 self._match(TokenType.ALIAS) 3287 offset = self._parse_id_var( 3288 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3289 ) or exp.to_identifier("offset") 3290 3291 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3292 3293 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3294 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3295 if not is_derived and not self._match_text_seq("VALUES"): 3296 return None 3297 3298 expressions = self._parse_csv(self._parse_value) 3299 alias = self._parse_table_alias() 3300 3301 if is_derived: 3302 self._match_r_paren() 3303 3304 return self.expression( 3305 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3306 ) 3307 3308 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3309 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3310 as_modifier and self._match_text_seq("USING", "SAMPLE") 3311 ): 3312 return None 3313 3314 bucket_numerator = None 3315 bucket_denominator = None 3316 bucket_field = None 3317 percent = None 3318 size = None 3319 seed = None 3320 3321 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3322 matched_l_paren = self._match(TokenType.L_PAREN) 3323 3324 if self.TABLESAMPLE_CSV: 3325 num = None 3326 expressions = self._parse_csv(self._parse_primary) 3327 else: 3328 expressions = None 3329 num = ( 3330 self._parse_factor() 3331 if self._match(TokenType.NUMBER, advance=False) 3332 else self._parse_primary() or self._parse_placeholder() 3333 ) 3334 3335 if self._match_text_seq("BUCKET"): 3336 bucket_numerator = self._parse_number() 3337 self._match_text_seq("OUT", "OF") 3338 bucket_denominator = bucket_denominator = self._parse_number() 3339 self._match(TokenType.ON) 3340 bucket_field = self._parse_field() 3341 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3342 percent = num 3343 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3344 size = num 3345 else: 3346 percent = num 3347 3348 if matched_l_paren: 3349 self._match_r_paren() 3350 3351 if self._match(TokenType.L_PAREN): 3352 method = self._parse_var(upper=True) 3353 seed = self._match(TokenType.COMMA) and self._parse_number() 3354 self._match_r_paren() 3355 elif self._match_texts(("SEED", "REPEATABLE")): 3356 seed = self._parse_wrapped(self._parse_number) 3357 3358 return self.expression( 3359 exp.TableSample, 3360 expressions=expressions, 3361 method=method, 3362 bucket_numerator=bucket_numerator, 3363 bucket_denominator=bucket_denominator, 3364 bucket_field=bucket_field, 3365 percent=percent, 3366 size=size, 3367 seed=seed, 3368 ) 3369 3370 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3371 return list(iter(self._parse_pivot, None)) or None 3372 3373 def _parse_joins(self) -> t.Iterator[exp.Join]: 3374 return iter(self._parse_join, None) 3375 3376 # https://duckdb.org/docs/sql/statements/pivot 3377 def _parse_simplified_pivot(self) -> exp.Pivot: 3378 def _parse_on() -> t.Optional[exp.Expression]: 3379 this = self._parse_bitwise() 3380 return self._parse_in(this) if self._match(TokenType.IN) else this 3381 3382 this = self._parse_table() 3383 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3384 using = self._match(TokenType.USING) and self._parse_csv( 3385 lambda: self._parse_alias(self._parse_function()) 3386 ) 3387 group = self._parse_group() 3388 return self.expression( 3389 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3390 ) 3391 3392 def _parse_pivot_in(self) -> exp.In: 3393 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3394 this = self._parse_conjunction() 3395 3396 self._match(TokenType.ALIAS) 3397 alias = self._parse_field() 3398 if alias: 3399 return self.expression(exp.PivotAlias, this=this, alias=alias) 3400 3401 return this 3402 3403 value = self._parse_column() 3404 3405 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3406 self.raise_error("Expecting IN (") 3407 3408 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3409 3410 self._match_r_paren() 3411 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3412 3413 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3414 index = self._index 3415 include_nulls = None 3416 3417 if self._match(TokenType.PIVOT): 3418 unpivot = False 3419 elif self._match(TokenType.UNPIVOT): 3420 unpivot = True 3421 3422 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3423 if self._match_text_seq("INCLUDE", "NULLS"): 3424 include_nulls = True 3425 elif self._match_text_seq("EXCLUDE", "NULLS"): 3426 include_nulls = False 3427 else: 3428 return None 3429 3430 expressions = [] 3431 3432 if not self._match(TokenType.L_PAREN): 3433 self._retreat(index) 3434 return None 3435 3436 if unpivot: 3437 expressions = self._parse_csv(self._parse_column) 3438 else: 3439 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3440 3441 if not expressions: 3442 self.raise_error("Failed to parse PIVOT's aggregation list") 3443 3444 if not self._match(TokenType.FOR): 3445 self.raise_error("Expecting FOR") 3446 3447 field = self._parse_pivot_in() 3448 3449 self._match_r_paren() 3450 3451 pivot = self.expression( 3452 exp.Pivot, 3453 expressions=expressions, 3454 field=field, 3455 unpivot=unpivot, 3456 include_nulls=include_nulls, 3457 ) 3458 3459 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3460 pivot.set("alias", self._parse_table_alias()) 3461 3462 if not unpivot: 3463 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3464 3465 columns: t.List[exp.Expression] = [] 3466 for fld in pivot.args["field"].expressions: 3467 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3468 for name in names: 3469 if self.PREFIXED_PIVOT_COLUMNS: 3470 name = f"{name}_{field_name}" if name else field_name 3471 else: 3472 name = f"{field_name}_{name}" if name else field_name 3473 3474 columns.append(exp.to_identifier(name)) 3475 3476 pivot.set("columns", columns) 3477 3478 return pivot 3479 3480 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3481 return [agg.alias for agg in aggregations] 3482 3483 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3484 if not skip_where_token and not self._match(TokenType.PREWHERE): 3485 return None 3486 3487 return self.expression( 3488 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3489 ) 3490 3491 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3492 if not skip_where_token and not self._match(TokenType.WHERE): 3493 return None 3494 3495 return self.expression( 3496 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3497 ) 3498 3499 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3500 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3501 return None 3502 3503 elements: t.Dict[str, t.Any] = defaultdict(list) 3504 3505 if self._match(TokenType.ALL): 3506 elements["all"] = True 3507 elif self._match(TokenType.DISTINCT): 3508 elements["all"] = False 3509 3510 while True: 3511 expressions = self._parse_csv(self._parse_conjunction) 3512 if expressions: 3513 elements["expressions"].extend(expressions) 3514 3515 grouping_sets = self._parse_grouping_sets() 3516 if grouping_sets: 3517 elements["grouping_sets"].extend(grouping_sets) 3518 3519 rollup = None 3520 cube = None 3521 totals = None 3522 3523 index = self._index 3524 with_ = self._match(TokenType.WITH) 3525 if self._match(TokenType.ROLLUP): 3526 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3527 elements["rollup"].extend(ensure_list(rollup)) 3528 3529 if self._match(TokenType.CUBE): 3530 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3531 elements["cube"].extend(ensure_list(cube)) 3532 3533 if self._match_text_seq("TOTALS"): 3534 totals = True 3535 elements["totals"] = True # type: ignore 3536 3537 if not (grouping_sets or rollup or cube or totals): 3538 if with_: 3539 self._retreat(index) 3540 break 3541 3542 return self.expression(exp.Group, **elements) # type: ignore 3543 3544 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3545 if not self._match(TokenType.GROUPING_SETS): 3546 return None 3547 3548 return self._parse_wrapped_csv(self._parse_grouping_set) 3549 3550 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3551 if self._match(TokenType.L_PAREN): 3552 grouping_set = self._parse_csv(self._parse_column) 3553 self._match_r_paren() 3554 return self.expression(exp.Tuple, expressions=grouping_set) 3555 3556 return self._parse_column() 3557 3558 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3559 if not skip_having_token and not self._match(TokenType.HAVING): 3560 return None 3561 return self.expression(exp.Having, this=self._parse_conjunction()) 3562 3563 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3564 if not self._match(TokenType.QUALIFY): 3565 return None 3566 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3567 3568 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3569 if skip_start_token: 3570 start = None 3571 elif self._match(TokenType.START_WITH): 3572 start = self._parse_conjunction() 3573 else: 3574 return None 3575 3576 self._match(TokenType.CONNECT_BY) 3577 nocycle = self._match_text_seq("NOCYCLE") 3578 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3579 exp.Prior, this=self._parse_bitwise() 3580 ) 3581 connect = self._parse_conjunction() 3582 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3583 3584 if not start and self._match(TokenType.START_WITH): 3585 start = self._parse_conjunction() 3586 3587 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3588 3589 def _parse_name_as_expression(self) -> exp.Alias: 3590 return self.expression( 3591 exp.Alias, 3592 alias=self._parse_id_var(any_token=True), 3593 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3594 ) 3595 3596 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3597 if self._match_text_seq("INTERPOLATE"): 3598 return self._parse_wrapped_csv(self._parse_name_as_expression) 3599 return None 3600 3601 def _parse_order( 3602 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3603 ) -> t.Optional[exp.Expression]: 3604 siblings = None 3605 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3606 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3607 return this 3608 3609 siblings = True 3610 3611 return self.expression( 3612 exp.Order, 3613 this=this, 3614 expressions=self._parse_csv(self._parse_ordered), 3615 interpolate=self._parse_interpolate(), 3616 siblings=siblings, 3617 ) 3618 3619 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3620 if not self._match(token): 3621 return None 3622 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3623 3624 def _parse_ordered( 3625 self, parse_method: t.Optional[t.Callable] = None 3626 ) -> t.Optional[exp.Ordered]: 3627 this = parse_method() if parse_method else self._parse_conjunction() 3628 if not this: 3629 return None 3630 3631 asc = self._match(TokenType.ASC) 3632 desc = self._match(TokenType.DESC) or (asc and False) 3633 3634 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3635 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3636 3637 nulls_first = is_nulls_first or False 3638 explicitly_null_ordered = is_nulls_first or is_nulls_last 3639 3640 if ( 3641 not explicitly_null_ordered 3642 and ( 3643 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3644 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3645 ) 3646 and self.dialect.NULL_ORDERING != "nulls_are_last" 3647 ): 3648 nulls_first = True 3649 3650 if self._match_text_seq("WITH", "FILL"): 3651 with_fill = self.expression( 3652 exp.WithFill, 3653 **{ # type: ignore 3654 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3655 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3656 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3657 }, 3658 ) 3659 else: 3660 with_fill = None 3661 3662 return self.expression( 3663 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3664 ) 3665 3666 def _parse_limit( 3667 self, 3668 this: t.Optional[exp.Expression] = None, 3669 top: bool = False, 3670 skip_limit_token: bool = False, 3671 ) -> t.Optional[exp.Expression]: 3672 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3673 comments = self._prev_comments 3674 if top: 3675 limit_paren = self._match(TokenType.L_PAREN) 3676 expression = self._parse_term() if limit_paren else self._parse_number() 3677 3678 if limit_paren: 3679 self._match_r_paren() 3680 else: 3681 expression = self._parse_term() 3682 3683 if self._match(TokenType.COMMA): 3684 offset = expression 3685 expression = self._parse_term() 3686 else: 3687 offset = None 3688 3689 limit_exp = self.expression( 3690 exp.Limit, 3691 this=this, 3692 expression=expression, 3693 offset=offset, 3694 comments=comments, 3695 expressions=self._parse_limit_by(), 3696 ) 3697 3698 return limit_exp 3699 3700 if self._match(TokenType.FETCH): 3701 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3702 direction = self._prev.text.upper() if direction else "FIRST" 3703 3704 count = self._parse_field(tokens=self.FETCH_TOKENS) 3705 percent = self._match(TokenType.PERCENT) 3706 3707 self._match_set((TokenType.ROW, TokenType.ROWS)) 3708 3709 only = self._match_text_seq("ONLY") 3710 with_ties = self._match_text_seq("WITH", "TIES") 3711 3712 if only and with_ties: 3713 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3714 3715 return self.expression( 3716 exp.Fetch, 3717 direction=direction, 3718 count=count, 3719 percent=percent, 3720 with_ties=with_ties, 3721 ) 3722 3723 return this 3724 3725 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3726 if not self._match(TokenType.OFFSET): 3727 return this 3728 3729 count = self._parse_term() 3730 self._match_set((TokenType.ROW, TokenType.ROWS)) 3731 3732 return self.expression( 3733 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3734 ) 3735 3736 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3737 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3738 3739 def _parse_locks(self) -> t.List[exp.Lock]: 3740 locks = [] 3741 while True: 3742 if self._match_text_seq("FOR", "UPDATE"): 3743 update = True 3744 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3745 "LOCK", "IN", "SHARE", "MODE" 3746 ): 3747 update = False 3748 else: 3749 break 3750 3751 expressions = None 3752 if self._match_text_seq("OF"): 3753 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3754 3755 wait: t.Optional[bool | exp.Expression] = None 3756 if self._match_text_seq("NOWAIT"): 3757 wait = True 3758 elif self._match_text_seq("WAIT"): 3759 wait = self._parse_primary() 3760 elif self._match_text_seq("SKIP", "LOCKED"): 3761 wait = False 3762 3763 locks.append( 3764 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3765 ) 3766 3767 return locks 3768 3769 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3770 while this and self._match_set(self.SET_OPERATIONS): 3771 token_type = self._prev.token_type 3772 3773 if token_type == TokenType.UNION: 3774 operation = exp.Union 3775 elif token_type == TokenType.EXCEPT: 3776 operation = exp.Except 3777 else: 3778 operation = exp.Intersect 3779 3780 comments = self._prev.comments 3781 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3782 by_name = self._match_text_seq("BY", "NAME") 3783 expression = self._parse_select(nested=True, parse_set_operation=False) 3784 3785 this = self.expression( 3786 operation, 3787 comments=comments, 3788 this=this, 3789 distinct=distinct, 3790 by_name=by_name, 3791 expression=expression, 3792 ) 3793 3794 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3795 expression = this.expression 3796 3797 if expression: 3798 for arg in self.UNION_MODIFIERS: 3799 expr = expression.args.get(arg) 3800 if expr: 3801 this.set(arg, expr.pop()) 3802 3803 return this 3804 3805 def _parse_expression(self) -> t.Optional[exp.Expression]: 3806 return self._parse_alias(self._parse_conjunction()) 3807 3808 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3809 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3810 3811 def _parse_equality(self) -> t.Optional[exp.Expression]: 3812 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3813 3814 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3815 return self._parse_tokens(self._parse_range, self.COMPARISON) 3816 3817 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3818 this = this or self._parse_bitwise() 3819 negate = self._match(TokenType.NOT) 3820 3821 if self._match_set(self.RANGE_PARSERS): 3822 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3823 if not expression: 3824 return this 3825 3826 this = expression 3827 elif self._match(TokenType.ISNULL): 3828 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3829 3830 # Postgres supports ISNULL and NOTNULL for conditions. 3831 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3832 if self._match(TokenType.NOTNULL): 3833 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3834 this = self.expression(exp.Not, this=this) 3835 3836 if negate: 3837 this = self.expression(exp.Not, this=this) 3838 3839 if self._match(TokenType.IS): 3840 this = self._parse_is(this) 3841 3842 return this 3843 3844 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3845 index = self._index - 1 3846 negate = self._match(TokenType.NOT) 3847 3848 if self._match_text_seq("DISTINCT", "FROM"): 3849 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3850 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3851 3852 expression = self._parse_null() or self._parse_boolean() 3853 if not expression: 3854 self._retreat(index) 3855 return None 3856 3857 this = self.expression(exp.Is, this=this, expression=expression) 3858 return self.expression(exp.Not, this=this) if negate else this 3859 3860 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3861 unnest = self._parse_unnest(with_alias=False) 3862 if unnest: 3863 this = self.expression(exp.In, this=this, unnest=unnest) 3864 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3865 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3866 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3867 3868 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3869 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3870 else: 3871 this = self.expression(exp.In, this=this, expressions=expressions) 3872 3873 if matched_l_paren: 3874 self._match_r_paren(this) 3875 elif not self._match(TokenType.R_BRACKET, expression=this): 3876 self.raise_error("Expecting ]") 3877 else: 3878 this = self.expression(exp.In, this=this, field=self._parse_field()) 3879 3880 return this 3881 3882 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3883 low = self._parse_bitwise() 3884 self._match(TokenType.AND) 3885 high = self._parse_bitwise() 3886 return self.expression(exp.Between, this=this, low=low, high=high) 3887 3888 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3889 if not self._match(TokenType.ESCAPE): 3890 return this 3891 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3892 3893 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3894 index = self._index 3895 3896 if not self._match(TokenType.INTERVAL) and match_interval: 3897 return None 3898 3899 if self._match(TokenType.STRING, advance=False): 3900 this = self._parse_primary() 3901 else: 3902 this = self._parse_term() 3903 3904 if not this or ( 3905 isinstance(this, exp.Column) 3906 and not this.table 3907 and not this.this.quoted 3908 and this.name.upper() == "IS" 3909 ): 3910 self._retreat(index) 3911 return None 3912 3913 unit = self._parse_function() or ( 3914 not self._match(TokenType.ALIAS, advance=False) 3915 and self._parse_var(any_token=True, upper=True) 3916 ) 3917 3918 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3919 # each INTERVAL expression into this canonical form so it's easy to transpile 3920 if this and this.is_number: 3921 this = exp.Literal.string(this.name) 3922 elif this and this.is_string: 3923 parts = this.name.split() 3924 3925 if len(parts) == 2: 3926 if unit: 3927 # This is not actually a unit, it's something else (e.g. a "window side") 3928 unit = None 3929 self._retreat(self._index - 1) 3930 3931 this = exp.Literal.string(parts[0]) 3932 unit = self.expression(exp.Var, this=parts[1].upper()) 3933 3934 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3935 unit = self.expression( 3936 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3937 ) 3938 3939 return self.expression(exp.Interval, this=this, unit=unit) 3940 3941 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3942 this = self._parse_term() 3943 3944 while True: 3945 if self._match_set(self.BITWISE): 3946 this = self.expression( 3947 self.BITWISE[self._prev.token_type], 3948 this=this, 3949 expression=self._parse_term(), 3950 ) 3951 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3952 this = self.expression( 3953 exp.DPipe, 3954 this=this, 3955 expression=self._parse_term(), 3956 safe=not self.dialect.STRICT_STRING_CONCAT, 3957 ) 3958 elif self._match(TokenType.DQMARK): 3959 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3960 elif self._match_pair(TokenType.LT, TokenType.LT): 3961 this = self.expression( 3962 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3963 ) 3964 elif self._match_pair(TokenType.GT, TokenType.GT): 3965 this = self.expression( 3966 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3967 ) 3968 else: 3969 break 3970 3971 return this 3972 3973 def _parse_term(self) -> t.Optional[exp.Expression]: 3974 return self._parse_tokens(self._parse_factor, self.TERM) 3975 3976 def _parse_factor(self) -> t.Optional[exp.Expression]: 3977 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3978 this = parse_method() 3979 3980 while self._match_set(self.FACTOR): 3981 this = self.expression( 3982 self.FACTOR[self._prev.token_type], 3983 this=this, 3984 comments=self._prev_comments, 3985 expression=parse_method(), 3986 ) 3987 if isinstance(this, exp.Div): 3988 this.args["typed"] = self.dialect.TYPED_DIVISION 3989 this.args["safe"] = self.dialect.SAFE_DIVISION 3990 3991 return this 3992 3993 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3994 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3995 3996 def _parse_unary(self) -> t.Optional[exp.Expression]: 3997 if self._match_set(self.UNARY_PARSERS): 3998 return self.UNARY_PARSERS[self._prev.token_type](self) 3999 return self._parse_at_time_zone(self._parse_type()) 4000 4001 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4002 interval = parse_interval and self._parse_interval() 4003 if interval: 4004 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4005 while True: 4006 index = self._index 4007 self._match(TokenType.PLUS) 4008 4009 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4010 self._retreat(index) 4011 break 4012 4013 interval = self.expression( # type: ignore 4014 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4015 ) 4016 4017 return interval 4018 4019 index = self._index 4020 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4021 this = self._parse_column() 4022 4023 if data_type: 4024 if isinstance(this, exp.Literal): 4025 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4026 if parser: 4027 return parser(self, this, data_type) 4028 return self.expression(exp.Cast, this=this, to=data_type) 4029 if not data_type.expressions: 4030 self._retreat(index) 4031 return self._parse_column() 4032 return self._parse_column_ops(data_type) 4033 4034 return this and self._parse_column_ops(this) 4035 4036 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4037 this = self._parse_type() 4038 if not this: 4039 return None 4040 4041 if isinstance(this, exp.Column) and not this.table: 4042 this = exp.var(this.name.upper()) 4043 4044 return self.expression( 4045 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4046 ) 4047 4048 def _parse_types( 4049 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4050 ) -> t.Optional[exp.Expression]: 4051 index = self._index 4052 4053 prefix = self._match_text_seq("SYSUDTLIB", ".") 4054 4055 if not self._match_set(self.TYPE_TOKENS): 4056 identifier = allow_identifiers and self._parse_id_var( 4057 any_token=False, tokens=(TokenType.VAR,) 4058 ) 4059 if identifier: 4060 tokens = self.dialect.tokenize(identifier.name) 4061 4062 if len(tokens) != 1: 4063 self.raise_error("Unexpected identifier", self._prev) 4064 4065 if tokens[0].token_type in self.TYPE_TOKENS: 4066 self._prev = tokens[0] 4067 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4068 type_name = identifier.name 4069 4070 while self._match(TokenType.DOT): 4071 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4072 4073 return exp.DataType.build(type_name, udt=True) 4074 else: 4075 self._retreat(self._index - 1) 4076 return None 4077 else: 4078 return None 4079 4080 type_token = self._prev.token_type 4081 4082 if type_token == TokenType.PSEUDO_TYPE: 4083 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4084 4085 if type_token == TokenType.OBJECT_IDENTIFIER: 4086 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4087 4088 nested = type_token in self.NESTED_TYPE_TOKENS 4089 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4090 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4091 expressions = None 4092 maybe_func = False 4093 4094 if self._match(TokenType.L_PAREN): 4095 if is_struct: 4096 expressions = self._parse_csv(self._parse_struct_types) 4097 elif nested: 4098 expressions = self._parse_csv( 4099 lambda: self._parse_types( 4100 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4101 ) 4102 ) 4103 elif type_token in self.ENUM_TYPE_TOKENS: 4104 expressions = self._parse_csv(self._parse_equality) 4105 elif is_aggregate: 4106 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4107 any_token=False, tokens=(TokenType.VAR,) 4108 ) 4109 if not func_or_ident or not self._match(TokenType.COMMA): 4110 return None 4111 expressions = self._parse_csv( 4112 lambda: self._parse_types( 4113 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4114 ) 4115 ) 4116 expressions.insert(0, func_or_ident) 4117 else: 4118 expressions = self._parse_csv(self._parse_type_size) 4119 4120 if not expressions or not self._match(TokenType.R_PAREN): 4121 self._retreat(index) 4122 return None 4123 4124 maybe_func = True 4125 4126 this: t.Optional[exp.Expression] = None 4127 values: t.Optional[t.List[exp.Expression]] = None 4128 4129 if nested and self._match(TokenType.LT): 4130 if is_struct: 4131 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4132 else: 4133 expressions = self._parse_csv( 4134 lambda: self._parse_types( 4135 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4136 ) 4137 ) 4138 4139 if not self._match(TokenType.GT): 4140 self.raise_error("Expecting >") 4141 4142 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4143 values = self._parse_csv(self._parse_conjunction) 4144 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4145 4146 if type_token in self.TIMESTAMPS: 4147 if self._match_text_seq("WITH", "TIME", "ZONE"): 4148 maybe_func = False 4149 tz_type = ( 4150 exp.DataType.Type.TIMETZ 4151 if type_token in self.TIMES 4152 else exp.DataType.Type.TIMESTAMPTZ 4153 ) 4154 this = exp.DataType(this=tz_type, expressions=expressions) 4155 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4156 maybe_func = False 4157 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4158 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4159 maybe_func = False 4160 elif type_token == TokenType.INTERVAL: 4161 unit = self._parse_var(upper=True) 4162 if unit: 4163 if self._match_text_seq("TO"): 4164 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4165 4166 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4167 else: 4168 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4169 4170 if maybe_func and check_func: 4171 index2 = self._index 4172 peek = self._parse_string() 4173 4174 if not peek: 4175 self._retreat(index) 4176 return None 4177 4178 self._retreat(index2) 4179 4180 if not this: 4181 if self._match_text_seq("UNSIGNED"): 4182 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4183 if not unsigned_type_token: 4184 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4185 4186 type_token = unsigned_type_token or type_token 4187 4188 this = exp.DataType( 4189 this=exp.DataType.Type[type_token.value], 4190 expressions=expressions, 4191 nested=nested, 4192 values=values, 4193 prefix=prefix, 4194 ) 4195 4196 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4197 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4198 4199 return this 4200 4201 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4202 index = self._index 4203 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4204 self._match(TokenType.COLON) 4205 column_def = self._parse_column_def(this) 4206 4207 if type_required and ( 4208 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4209 ): 4210 self._retreat(index) 4211 return self._parse_types() 4212 4213 return column_def 4214 4215 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4216 if not self._match_text_seq("AT", "TIME", "ZONE"): 4217 return this 4218 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4219 4220 def _parse_column(self) -> t.Optional[exp.Expression]: 4221 this = self._parse_column_reference() 4222 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4223 4224 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4225 this = self._parse_field() 4226 if ( 4227 not this 4228 and self._match(TokenType.VALUES, advance=False) 4229 and self.VALUES_FOLLOWED_BY_PAREN 4230 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4231 ): 4232 this = self._parse_id_var() 4233 4234 if isinstance(this, exp.Identifier): 4235 # We bubble up comments from the Identifier to the Column 4236 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4237 4238 return this 4239 4240 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4241 this = self._parse_bracket(this) 4242 4243 while self._match_set(self.COLUMN_OPERATORS): 4244 op_token = self._prev.token_type 4245 op = self.COLUMN_OPERATORS.get(op_token) 4246 4247 if op_token == TokenType.DCOLON: 4248 field = self._parse_types() 4249 if not field: 4250 self.raise_error("Expected type") 4251 elif op and self._curr: 4252 field = self._parse_column_reference() 4253 else: 4254 field = self._parse_field(any_token=True, anonymous_func=True) 4255 4256 if isinstance(field, exp.Func) and this: 4257 # bigquery allows function calls like x.y.count(...) 4258 # SAFE.SUBSTR(...) 4259 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4260 this = exp.replace_tree( 4261 this, 4262 lambda n: ( 4263 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4264 if n.table 4265 else n.this 4266 ) 4267 if isinstance(n, exp.Column) 4268 else n, 4269 ) 4270 4271 if op: 4272 this = op(self, this, field) 4273 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4274 this = self.expression( 4275 exp.Column, 4276 this=field, 4277 table=this.this, 4278 db=this.args.get("table"), 4279 catalog=this.args.get("db"), 4280 ) 4281 else: 4282 this = self.expression(exp.Dot, this=this, expression=field) 4283 this = self._parse_bracket(this) 4284 return this 4285 4286 def _parse_primary(self) -> t.Optional[exp.Expression]: 4287 if self._match_set(self.PRIMARY_PARSERS): 4288 token_type = self._prev.token_type 4289 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4290 4291 if token_type == TokenType.STRING: 4292 expressions = [primary] 4293 while self._match(TokenType.STRING): 4294 expressions.append(exp.Literal.string(self._prev.text)) 4295 4296 if len(expressions) > 1: 4297 return self.expression(exp.Concat, expressions=expressions) 4298 4299 return primary 4300 4301 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4302 return exp.Literal.number(f"0.{self._prev.text}") 4303 4304 if self._match(TokenType.L_PAREN): 4305 comments = self._prev_comments 4306 query = self._parse_select() 4307 4308 if query: 4309 expressions = [query] 4310 else: 4311 expressions = self._parse_expressions() 4312 4313 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4314 4315 if isinstance(this, exp.UNWRAPPED_QUERIES): 4316 this = self._parse_set_operations( 4317 self._parse_subquery(this=this, parse_alias=False) 4318 ) 4319 elif isinstance(this, exp.Subquery): 4320 this = self._parse_subquery( 4321 this=self._parse_set_operations(this), parse_alias=False 4322 ) 4323 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4324 this = self.expression(exp.Tuple, expressions=expressions) 4325 else: 4326 this = self.expression(exp.Paren, this=this) 4327 4328 if this: 4329 this.add_comments(comments) 4330 4331 self._match_r_paren(expression=this) 4332 return this 4333 4334 return None 4335 4336 def _parse_field( 4337 self, 4338 any_token: bool = False, 4339 tokens: t.Optional[t.Collection[TokenType]] = None, 4340 anonymous_func: bool = False, 4341 ) -> t.Optional[exp.Expression]: 4342 if anonymous_func: 4343 field = ( 4344 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4345 or self._parse_primary() 4346 ) 4347 else: 4348 field = self._parse_primary() or self._parse_function( 4349 anonymous=anonymous_func, any_token=any_token 4350 ) 4351 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4352 4353 def _parse_function( 4354 self, 4355 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4356 anonymous: bool = False, 4357 optional_parens: bool = True, 4358 any_token: bool = False, 4359 ) -> t.Optional[exp.Expression]: 4360 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4361 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4362 fn_syntax = False 4363 if ( 4364 self._match(TokenType.L_BRACE, advance=False) 4365 and self._next 4366 and self._next.text.upper() == "FN" 4367 ): 4368 self._advance(2) 4369 fn_syntax = True 4370 4371 func = self._parse_function_call( 4372 functions=functions, 4373 anonymous=anonymous, 4374 optional_parens=optional_parens, 4375 any_token=any_token, 4376 ) 4377 4378 if fn_syntax: 4379 self._match(TokenType.R_BRACE) 4380 4381 return func 4382 4383 def _parse_function_call( 4384 self, 4385 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4386 anonymous: bool = False, 4387 optional_parens: bool = True, 4388 any_token: bool = False, 4389 ) -> t.Optional[exp.Expression]: 4390 if not self._curr: 4391 return None 4392 4393 comments = self._curr.comments 4394 token_type = self._curr.token_type 4395 this = self._curr.text 4396 upper = this.upper() 4397 4398 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4399 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4400 self._advance() 4401 return self._parse_window(parser(self)) 4402 4403 if not self._next or self._next.token_type != TokenType.L_PAREN: 4404 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4405 self._advance() 4406 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4407 4408 return None 4409 4410 if any_token: 4411 if token_type in self.RESERVED_TOKENS: 4412 return None 4413 elif token_type not in self.FUNC_TOKENS: 4414 return None 4415 4416 self._advance(2) 4417 4418 parser = self.FUNCTION_PARSERS.get(upper) 4419 if parser and not anonymous: 4420 this = parser(self) 4421 else: 4422 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4423 4424 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4425 this = self.expression(subquery_predicate, this=self._parse_select()) 4426 self._match_r_paren() 4427 return this 4428 4429 if functions is None: 4430 functions = self.FUNCTIONS 4431 4432 function = functions.get(upper) 4433 4434 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4435 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4436 4437 if alias: 4438 args = self._kv_to_prop_eq(args) 4439 4440 if function and not anonymous: 4441 if "dialect" in function.__code__.co_varnames: 4442 func = function(args, dialect=self.dialect) 4443 else: 4444 func = function(args) 4445 4446 func = self.validate_expression(func, args) 4447 if not self.dialect.NORMALIZE_FUNCTIONS: 4448 func.meta["name"] = this 4449 4450 this = func 4451 else: 4452 if token_type == TokenType.IDENTIFIER: 4453 this = exp.Identifier(this=this, quoted=True) 4454 this = self.expression(exp.Anonymous, this=this, expressions=args) 4455 4456 if isinstance(this, exp.Expression): 4457 this.add_comments(comments) 4458 4459 self._match_r_paren(this) 4460 return self._parse_window(this) 4461 4462 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4463 transformed = [] 4464 4465 for e in expressions: 4466 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4467 if isinstance(e, exp.Alias): 4468 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4469 4470 if not isinstance(e, exp.PropertyEQ): 4471 e = self.expression( 4472 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4473 ) 4474 4475 if isinstance(e.this, exp.Column): 4476 e.this.replace(e.this.this) 4477 4478 transformed.append(e) 4479 4480 return transformed 4481 4482 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4483 return self._parse_column_def(self._parse_id_var()) 4484 4485 def _parse_user_defined_function( 4486 self, kind: t.Optional[TokenType] = None 4487 ) -> t.Optional[exp.Expression]: 4488 this = self._parse_id_var() 4489 4490 while self._match(TokenType.DOT): 4491 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4492 4493 if not self._match(TokenType.L_PAREN): 4494 return this 4495 4496 expressions = self._parse_csv(self._parse_function_parameter) 4497 self._match_r_paren() 4498 return self.expression( 4499 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4500 ) 4501 4502 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4503 literal = self._parse_primary() 4504 if literal: 4505 return self.expression(exp.Introducer, this=token.text, expression=literal) 4506 4507 return self.expression(exp.Identifier, this=token.text) 4508 4509 def _parse_session_parameter(self) -> exp.SessionParameter: 4510 kind = None 4511 this = self._parse_id_var() or self._parse_primary() 4512 4513 if this and self._match(TokenType.DOT): 4514 kind = this.name 4515 this = self._parse_var() or self._parse_primary() 4516 4517 return self.expression(exp.SessionParameter, this=this, kind=kind) 4518 4519 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4520 index = self._index 4521 4522 if self._match(TokenType.L_PAREN): 4523 expressions = t.cast( 4524 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4525 ) 4526 4527 if not self._match(TokenType.R_PAREN): 4528 self._retreat(index) 4529 else: 4530 expressions = [self._parse_id_var()] 4531 4532 if self._match_set(self.LAMBDAS): 4533 return self.LAMBDAS[self._prev.token_type](self, expressions) 4534 4535 self._retreat(index) 4536 4537 this: t.Optional[exp.Expression] 4538 4539 if self._match(TokenType.DISTINCT): 4540 this = self.expression( 4541 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4542 ) 4543 else: 4544 this = self._parse_select_or_expression(alias=alias) 4545 4546 return self._parse_limit( 4547 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4548 ) 4549 4550 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4551 index = self._index 4552 if not self._match(TokenType.L_PAREN): 4553 return this 4554 4555 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4556 # expr can be of both types 4557 if self._match_set(self.SELECT_START_TOKENS): 4558 self._retreat(index) 4559 return this 4560 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4561 self._match_r_paren() 4562 return self.expression(exp.Schema, this=this, expressions=args) 4563 4564 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4565 return self._parse_column_def(self._parse_field(any_token=True)) 4566 4567 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4568 # column defs are not really columns, they're identifiers 4569 if isinstance(this, exp.Column): 4570 this = this.this 4571 4572 kind = self._parse_types(schema=True) 4573 4574 if self._match_text_seq("FOR", "ORDINALITY"): 4575 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4576 4577 constraints: t.List[exp.Expression] = [] 4578 4579 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4580 ("ALIAS", "MATERIALIZED") 4581 ): 4582 persisted = self._prev.text.upper() == "MATERIALIZED" 4583 constraints.append( 4584 self.expression( 4585 exp.ComputedColumnConstraint, 4586 this=self._parse_conjunction(), 4587 persisted=persisted or self._match_text_seq("PERSISTED"), 4588 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4589 ) 4590 ) 4591 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4592 self._match(TokenType.ALIAS) 4593 constraints.append( 4594 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4595 ) 4596 4597 while True: 4598 constraint = self._parse_column_constraint() 4599 if not constraint: 4600 break 4601 constraints.append(constraint) 4602 4603 if not kind and not constraints: 4604 return this 4605 4606 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4607 4608 def _parse_auto_increment( 4609 self, 4610 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4611 start = None 4612 increment = None 4613 4614 if self._match(TokenType.L_PAREN, advance=False): 4615 args = self._parse_wrapped_csv(self._parse_bitwise) 4616 start = seq_get(args, 0) 4617 increment = seq_get(args, 1) 4618 elif self._match_text_seq("START"): 4619 start = self._parse_bitwise() 4620 self._match_text_seq("INCREMENT") 4621 increment = self._parse_bitwise() 4622 4623 if start and increment: 4624 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4625 4626 return exp.AutoIncrementColumnConstraint() 4627 4628 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4629 if not self._match_text_seq("REFRESH"): 4630 self._retreat(self._index - 1) 4631 return None 4632 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4633 4634 def _parse_compress(self) -> exp.CompressColumnConstraint: 4635 if self._match(TokenType.L_PAREN, advance=False): 4636 return self.expression( 4637 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4638 ) 4639 4640 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4641 4642 def _parse_generated_as_identity( 4643 self, 4644 ) -> ( 4645 exp.GeneratedAsIdentityColumnConstraint 4646 | exp.ComputedColumnConstraint 4647 | exp.GeneratedAsRowColumnConstraint 4648 ): 4649 if self._match_text_seq("BY", "DEFAULT"): 4650 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4651 this = self.expression( 4652 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4653 ) 4654 else: 4655 self._match_text_seq("ALWAYS") 4656 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4657 4658 self._match(TokenType.ALIAS) 4659 4660 if self._match_text_seq("ROW"): 4661 start = self._match_text_seq("START") 4662 if not start: 4663 self._match(TokenType.END) 4664 hidden = self._match_text_seq("HIDDEN") 4665 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4666 4667 identity = self._match_text_seq("IDENTITY") 4668 4669 if self._match(TokenType.L_PAREN): 4670 if self._match(TokenType.START_WITH): 4671 this.set("start", self._parse_bitwise()) 4672 if self._match_text_seq("INCREMENT", "BY"): 4673 this.set("increment", self._parse_bitwise()) 4674 if self._match_text_seq("MINVALUE"): 4675 this.set("minvalue", self._parse_bitwise()) 4676 if self._match_text_seq("MAXVALUE"): 4677 this.set("maxvalue", self._parse_bitwise()) 4678 4679 if self._match_text_seq("CYCLE"): 4680 this.set("cycle", True) 4681 elif self._match_text_seq("NO", "CYCLE"): 4682 this.set("cycle", False) 4683 4684 if not identity: 4685 this.set("expression", self._parse_bitwise()) 4686 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4687 args = self._parse_csv(self._parse_bitwise) 4688 this.set("start", seq_get(args, 0)) 4689 this.set("increment", seq_get(args, 1)) 4690 4691 self._match_r_paren() 4692 4693 return this 4694 4695 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4696 self._match_text_seq("LENGTH") 4697 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4698 4699 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4700 if self._match_text_seq("NULL"): 4701 return self.expression(exp.NotNullColumnConstraint) 4702 if self._match_text_seq("CASESPECIFIC"): 4703 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4704 if self._match_text_seq("FOR", "REPLICATION"): 4705 return self.expression(exp.NotForReplicationColumnConstraint) 4706 return None 4707 4708 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4709 if self._match(TokenType.CONSTRAINT): 4710 this = self._parse_id_var() 4711 else: 4712 this = None 4713 4714 if self._match_texts(self.CONSTRAINT_PARSERS): 4715 return self.expression( 4716 exp.ColumnConstraint, 4717 this=this, 4718 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4719 ) 4720 4721 return this 4722 4723 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4724 if not self._match(TokenType.CONSTRAINT): 4725 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4726 4727 return self.expression( 4728 exp.Constraint, 4729 this=self._parse_id_var(), 4730 expressions=self._parse_unnamed_constraints(), 4731 ) 4732 4733 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4734 constraints = [] 4735 while True: 4736 constraint = self._parse_unnamed_constraint() or self._parse_function() 4737 if not constraint: 4738 break 4739 constraints.append(constraint) 4740 4741 return constraints 4742 4743 def _parse_unnamed_constraint( 4744 self, constraints: t.Optional[t.Collection[str]] = None 4745 ) -> t.Optional[exp.Expression]: 4746 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4747 constraints or self.CONSTRAINT_PARSERS 4748 ): 4749 return None 4750 4751 constraint = self._prev.text.upper() 4752 if constraint not in self.CONSTRAINT_PARSERS: 4753 self.raise_error(f"No parser found for schema constraint {constraint}.") 4754 4755 return self.CONSTRAINT_PARSERS[constraint](self) 4756 4757 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4758 self._match_text_seq("KEY") 4759 return self.expression( 4760 exp.UniqueColumnConstraint, 4761 this=self._parse_schema(self._parse_id_var(any_token=False)), 4762 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4763 on_conflict=self._parse_on_conflict(), 4764 ) 4765 4766 def _parse_key_constraint_options(self) -> t.List[str]: 4767 options = [] 4768 while True: 4769 if not self._curr: 4770 break 4771 4772 if self._match(TokenType.ON): 4773 action = None 4774 on = self._advance_any() and self._prev.text 4775 4776 if self._match_text_seq("NO", "ACTION"): 4777 action = "NO ACTION" 4778 elif self._match_text_seq("CASCADE"): 4779 action = "CASCADE" 4780 elif self._match_text_seq("RESTRICT"): 4781 action = "RESTRICT" 4782 elif self._match_pair(TokenType.SET, TokenType.NULL): 4783 action = "SET NULL" 4784 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4785 action = "SET DEFAULT" 4786 else: 4787 self.raise_error("Invalid key constraint") 4788 4789 options.append(f"ON {on} {action}") 4790 elif self._match_text_seq("NOT", "ENFORCED"): 4791 options.append("NOT ENFORCED") 4792 elif self._match_text_seq("DEFERRABLE"): 4793 options.append("DEFERRABLE") 4794 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4795 options.append("INITIALLY DEFERRED") 4796 elif self._match_text_seq("NORELY"): 4797 options.append("NORELY") 4798 elif self._match_text_seq("MATCH", "FULL"): 4799 options.append("MATCH FULL") 4800 else: 4801 break 4802 4803 return options 4804 4805 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4806 if match and not self._match(TokenType.REFERENCES): 4807 return None 4808 4809 expressions = None 4810 this = self._parse_table(schema=True) 4811 options = self._parse_key_constraint_options() 4812 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4813 4814 def _parse_foreign_key(self) -> exp.ForeignKey: 4815 expressions = self._parse_wrapped_id_vars() 4816 reference = self._parse_references() 4817 options = {} 4818 4819 while self._match(TokenType.ON): 4820 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4821 self.raise_error("Expected DELETE or UPDATE") 4822 4823 kind = self._prev.text.lower() 4824 4825 if self._match_text_seq("NO", "ACTION"): 4826 action = "NO ACTION" 4827 elif self._match(TokenType.SET): 4828 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4829 action = "SET " + self._prev.text.upper() 4830 else: 4831 self._advance() 4832 action = self._prev.text.upper() 4833 4834 options[kind] = action 4835 4836 return self.expression( 4837 exp.ForeignKey, 4838 expressions=expressions, 4839 reference=reference, 4840 **options, # type: ignore 4841 ) 4842 4843 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4844 return self._parse_field() 4845 4846 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4847 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4848 self._retreat(self._index - 1) 4849 return None 4850 4851 id_vars = self._parse_wrapped_id_vars() 4852 return self.expression( 4853 exp.PeriodForSystemTimeConstraint, 4854 this=seq_get(id_vars, 0), 4855 expression=seq_get(id_vars, 1), 4856 ) 4857 4858 def _parse_primary_key( 4859 self, wrapped_optional: bool = False, in_props: bool = False 4860 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4861 desc = ( 4862 self._match_set((TokenType.ASC, TokenType.DESC)) 4863 and self._prev.token_type == TokenType.DESC 4864 ) 4865 4866 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4867 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4868 4869 expressions = self._parse_wrapped_csv( 4870 self._parse_primary_key_part, optional=wrapped_optional 4871 ) 4872 options = self._parse_key_constraint_options() 4873 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4874 4875 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4876 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4877 4878 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4879 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4880 return this 4881 4882 bracket_kind = self._prev.token_type 4883 expressions = self._parse_csv( 4884 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4885 ) 4886 4887 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4888 self.raise_error("Expected ]") 4889 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4890 self.raise_error("Expected }") 4891 4892 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4893 if bracket_kind == TokenType.L_BRACE: 4894 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4895 elif not this or this.name.upper() == "ARRAY": 4896 this = self.expression(exp.Array, expressions=expressions) 4897 else: 4898 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4899 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4900 4901 self._add_comments(this) 4902 return self._parse_bracket(this) 4903 4904 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4905 if self._match(TokenType.COLON): 4906 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4907 return this 4908 4909 def _parse_case(self) -> t.Optional[exp.Expression]: 4910 ifs = [] 4911 default = None 4912 4913 comments = self._prev_comments 4914 expression = self._parse_conjunction() 4915 4916 while self._match(TokenType.WHEN): 4917 this = self._parse_conjunction() 4918 self._match(TokenType.THEN) 4919 then = self._parse_conjunction() 4920 ifs.append(self.expression(exp.If, this=this, true=then)) 4921 4922 if self._match(TokenType.ELSE): 4923 default = self._parse_conjunction() 4924 4925 if not self._match(TokenType.END): 4926 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4927 default = exp.column("interval") 4928 else: 4929 self.raise_error("Expected END after CASE", self._prev) 4930 4931 return self.expression( 4932 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4933 ) 4934 4935 def _parse_if(self) -> t.Optional[exp.Expression]: 4936 if self._match(TokenType.L_PAREN): 4937 args = self._parse_csv(self._parse_conjunction) 4938 this = self.validate_expression(exp.If.from_arg_list(args), args) 4939 self._match_r_paren() 4940 else: 4941 index = self._index - 1 4942 4943 if self.NO_PAREN_IF_COMMANDS and index == 0: 4944 return self._parse_as_command(self._prev) 4945 4946 condition = self._parse_conjunction() 4947 4948 if not condition: 4949 self._retreat(index) 4950 return None 4951 4952 self._match(TokenType.THEN) 4953 true = self._parse_conjunction() 4954 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4955 self._match(TokenType.END) 4956 this = self.expression(exp.If, this=condition, true=true, false=false) 4957 4958 return this 4959 4960 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4961 if not self._match_text_seq("VALUE", "FOR"): 4962 self._retreat(self._index - 1) 4963 return None 4964 4965 return self.expression( 4966 exp.NextValueFor, 4967 this=self._parse_column(), 4968 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4969 ) 4970 4971 def _parse_extract(self) -> exp.Extract: 4972 this = self._parse_function() or self._parse_var() or self._parse_type() 4973 4974 if self._match(TokenType.FROM): 4975 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4976 4977 if not self._match(TokenType.COMMA): 4978 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4979 4980 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4981 4982 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4983 this = self._parse_conjunction() 4984 4985 if not self._match(TokenType.ALIAS): 4986 if self._match(TokenType.COMMA): 4987 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4988 4989 self.raise_error("Expected AS after CAST") 4990 4991 fmt = None 4992 to = self._parse_types() 4993 4994 if self._match(TokenType.FORMAT): 4995 fmt_string = self._parse_string() 4996 fmt = self._parse_at_time_zone(fmt_string) 4997 4998 if not to: 4999 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5000 if to.this in exp.DataType.TEMPORAL_TYPES: 5001 this = self.expression( 5002 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5003 this=this, 5004 format=exp.Literal.string( 5005 format_time( 5006 fmt_string.this if fmt_string else "", 5007 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5008 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5009 ) 5010 ), 5011 ) 5012 5013 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5014 this.set("zone", fmt.args["zone"]) 5015 return this 5016 elif not to: 5017 self.raise_error("Expected TYPE after CAST") 5018 elif isinstance(to, exp.Identifier): 5019 to = exp.DataType.build(to.name, udt=True) 5020 elif to.this == exp.DataType.Type.CHAR: 5021 if self._match(TokenType.CHARACTER_SET): 5022 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5023 5024 return self.expression( 5025 exp.Cast if strict else exp.TryCast, 5026 this=this, 5027 to=to, 5028 format=fmt, 5029 safe=safe, 5030 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5031 ) 5032 5033 def _parse_string_agg(self) -> exp.Expression: 5034 if self._match(TokenType.DISTINCT): 5035 args: t.List[t.Optional[exp.Expression]] = [ 5036 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5037 ] 5038 if self._match(TokenType.COMMA): 5039 args.extend(self._parse_csv(self._parse_conjunction)) 5040 else: 5041 args = self._parse_csv(self._parse_conjunction) # type: ignore 5042 5043 index = self._index 5044 if not self._match(TokenType.R_PAREN) and args: 5045 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5046 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5047 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5048 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5049 5050 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5051 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5052 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5053 if not self._match_text_seq("WITHIN", "GROUP"): 5054 self._retreat(index) 5055 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5056 5057 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5058 order = self._parse_order(this=seq_get(args, 0)) 5059 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5060 5061 def _parse_convert( 5062 self, strict: bool, safe: t.Optional[bool] = None 5063 ) -> t.Optional[exp.Expression]: 5064 this = self._parse_bitwise() 5065 5066 if self._match(TokenType.USING): 5067 to: t.Optional[exp.Expression] = self.expression( 5068 exp.CharacterSet, this=self._parse_var() 5069 ) 5070 elif self._match(TokenType.COMMA): 5071 to = self._parse_types() 5072 else: 5073 to = None 5074 5075 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5076 5077 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5078 """ 5079 There are generally two variants of the DECODE function: 5080 5081 - DECODE(bin, charset) 5082 - DECODE(expression, search, result [, search, result] ... [, default]) 5083 5084 The second variant will always be parsed into a CASE expression. Note that NULL 5085 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5086 instead of relying on pattern matching. 5087 """ 5088 args = self._parse_csv(self._parse_conjunction) 5089 5090 if len(args) < 3: 5091 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5092 5093 expression, *expressions = args 5094 if not expression: 5095 return None 5096 5097 ifs = [] 5098 for search, result in zip(expressions[::2], expressions[1::2]): 5099 if not search or not result: 5100 return None 5101 5102 if isinstance(search, exp.Literal): 5103 ifs.append( 5104 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5105 ) 5106 elif isinstance(search, exp.Null): 5107 ifs.append( 5108 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5109 ) 5110 else: 5111 cond = exp.or_( 5112 exp.EQ(this=expression.copy(), expression=search), 5113 exp.and_( 5114 exp.Is(this=expression.copy(), expression=exp.Null()), 5115 exp.Is(this=search.copy(), expression=exp.Null()), 5116 copy=False, 5117 ), 5118 copy=False, 5119 ) 5120 ifs.append(exp.If(this=cond, true=result)) 5121 5122 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5123 5124 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5125 self._match_text_seq("KEY") 5126 key = self._parse_column() 5127 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5128 self._match_text_seq("VALUE") 5129 value = self._parse_bitwise() 5130 5131 if not key and not value: 5132 return None 5133 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5134 5135 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5136 if not this or not self._match_text_seq("FORMAT", "JSON"): 5137 return this 5138 5139 return self.expression(exp.FormatJson, this=this) 5140 5141 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5142 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5143 for value in values: 5144 if self._match_text_seq(value, "ON", on): 5145 return f"{value} ON {on}" 5146 5147 return None 5148 5149 @t.overload 5150 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5151 5152 @t.overload 5153 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5154 5155 def _parse_json_object(self, agg=False): 5156 star = self._parse_star() 5157 expressions = ( 5158 [star] 5159 if star 5160 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5161 ) 5162 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5163 5164 unique_keys = None 5165 if self._match_text_seq("WITH", "UNIQUE"): 5166 unique_keys = True 5167 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5168 unique_keys = False 5169 5170 self._match_text_seq("KEYS") 5171 5172 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5173 self._parse_type() 5174 ) 5175 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5176 5177 return self.expression( 5178 exp.JSONObjectAgg if agg else exp.JSONObject, 5179 expressions=expressions, 5180 null_handling=null_handling, 5181 unique_keys=unique_keys, 5182 return_type=return_type, 5183 encoding=encoding, 5184 ) 5185 5186 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5187 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5188 if not self._match_text_seq("NESTED"): 5189 this = self._parse_id_var() 5190 kind = self._parse_types(allow_identifiers=False) 5191 nested = None 5192 else: 5193 this = None 5194 kind = None 5195 nested = True 5196 5197 path = self._match_text_seq("PATH") and self._parse_string() 5198 nested_schema = nested and self._parse_json_schema() 5199 5200 return self.expression( 5201 exp.JSONColumnDef, 5202 this=this, 5203 kind=kind, 5204 path=path, 5205 nested_schema=nested_schema, 5206 ) 5207 5208 def _parse_json_schema(self) -> exp.JSONSchema: 5209 self._match_text_seq("COLUMNS") 5210 return self.expression( 5211 exp.JSONSchema, 5212 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5213 ) 5214 5215 def _parse_json_table(self) -> exp.JSONTable: 5216 this = self._parse_format_json(self._parse_bitwise()) 5217 path = self._match(TokenType.COMMA) and self._parse_string() 5218 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5219 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5220 schema = self._parse_json_schema() 5221 5222 return exp.JSONTable( 5223 this=this, 5224 schema=schema, 5225 path=path, 5226 error_handling=error_handling, 5227 empty_handling=empty_handling, 5228 ) 5229 5230 def _parse_match_against(self) -> exp.MatchAgainst: 5231 expressions = self._parse_csv(self._parse_column) 5232 5233 self._match_text_seq(")", "AGAINST", "(") 5234 5235 this = self._parse_string() 5236 5237 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5238 modifier = "IN NATURAL LANGUAGE MODE" 5239 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5240 modifier = f"{modifier} WITH QUERY EXPANSION" 5241 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5242 modifier = "IN BOOLEAN MODE" 5243 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5244 modifier = "WITH QUERY EXPANSION" 5245 else: 5246 modifier = None 5247 5248 return self.expression( 5249 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5250 ) 5251 5252 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5253 def _parse_open_json(self) -> exp.OpenJSON: 5254 this = self._parse_bitwise() 5255 path = self._match(TokenType.COMMA) and self._parse_string() 5256 5257 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5258 this = self._parse_field(any_token=True) 5259 kind = self._parse_types() 5260 path = self._parse_string() 5261 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5262 5263 return self.expression( 5264 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5265 ) 5266 5267 expressions = None 5268 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5269 self._match_l_paren() 5270 expressions = self._parse_csv(_parse_open_json_column_def) 5271 5272 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5273 5274 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5275 args = self._parse_csv(self._parse_bitwise) 5276 5277 if self._match(TokenType.IN): 5278 return self.expression( 5279 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5280 ) 5281 5282 if haystack_first: 5283 haystack = seq_get(args, 0) 5284 needle = seq_get(args, 1) 5285 else: 5286 needle = seq_get(args, 0) 5287 haystack = seq_get(args, 1) 5288 5289 return self.expression( 5290 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5291 ) 5292 5293 def _parse_predict(self) -> exp.Predict: 5294 self._match_text_seq("MODEL") 5295 this = self._parse_table() 5296 5297 self._match(TokenType.COMMA) 5298 self._match_text_seq("TABLE") 5299 5300 return self.expression( 5301 exp.Predict, 5302 this=this, 5303 expression=self._parse_table(), 5304 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5305 ) 5306 5307 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5308 args = self._parse_csv(self._parse_table) 5309 return exp.JoinHint(this=func_name.upper(), expressions=args) 5310 5311 def _parse_substring(self) -> exp.Substring: 5312 # Postgres supports the form: substring(string [from int] [for int]) 5313 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5314 5315 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5316 5317 if self._match(TokenType.FROM): 5318 args.append(self._parse_bitwise()) 5319 if self._match(TokenType.FOR): 5320 args.append(self._parse_bitwise()) 5321 5322 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5323 5324 def _parse_trim(self) -> exp.Trim: 5325 # https://www.w3resource.com/sql/character-functions/trim.php 5326 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5327 5328 position = None 5329 collation = None 5330 expression = None 5331 5332 if self._match_texts(self.TRIM_TYPES): 5333 position = self._prev.text.upper() 5334 5335 this = self._parse_bitwise() 5336 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5337 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5338 expression = self._parse_bitwise() 5339 5340 if invert_order: 5341 this, expression = expression, this 5342 5343 if self._match(TokenType.COLLATE): 5344 collation = self._parse_bitwise() 5345 5346 return self.expression( 5347 exp.Trim, this=this, position=position, expression=expression, collation=collation 5348 ) 5349 5350 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5351 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5352 5353 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5354 return self._parse_window(self._parse_id_var(), alias=True) 5355 5356 def _parse_respect_or_ignore_nulls( 5357 self, this: t.Optional[exp.Expression] 5358 ) -> t.Optional[exp.Expression]: 5359 if self._match_text_seq("IGNORE", "NULLS"): 5360 return self.expression(exp.IgnoreNulls, this=this) 5361 if self._match_text_seq("RESPECT", "NULLS"): 5362 return self.expression(exp.RespectNulls, this=this) 5363 return this 5364 5365 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5366 if self._match(TokenType.HAVING): 5367 self._match_texts(("MAX", "MIN")) 5368 max = self._prev.text.upper() != "MIN" 5369 return self.expression( 5370 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5371 ) 5372 5373 return this 5374 5375 def _parse_window( 5376 self, this: t.Optional[exp.Expression], alias: bool = False 5377 ) -> t.Optional[exp.Expression]: 5378 func = this 5379 comments = func.comments if isinstance(func, exp.Expression) else None 5380 5381 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5382 self._match(TokenType.WHERE) 5383 this = self.expression( 5384 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5385 ) 5386 self._match_r_paren() 5387 5388 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5389 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5390 if self._match_text_seq("WITHIN", "GROUP"): 5391 order = self._parse_wrapped(self._parse_order) 5392 this = self.expression(exp.WithinGroup, this=this, expression=order) 5393 5394 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5395 # Some dialects choose to implement and some do not. 5396 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5397 5398 # There is some code above in _parse_lambda that handles 5399 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5400 5401 # The below changes handle 5402 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5403 5404 # Oracle allows both formats 5405 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5406 # and Snowflake chose to do the same for familiarity 5407 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5408 if isinstance(this, exp.AggFunc): 5409 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5410 5411 if ignore_respect and ignore_respect is not this: 5412 ignore_respect.replace(ignore_respect.this) 5413 this = self.expression(ignore_respect.__class__, this=this) 5414 5415 this = self._parse_respect_or_ignore_nulls(this) 5416 5417 # bigquery select from window x AS (partition by ...) 5418 if alias: 5419 over = None 5420 self._match(TokenType.ALIAS) 5421 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5422 return this 5423 else: 5424 over = self._prev.text.upper() 5425 5426 if comments and isinstance(func, exp.Expression): 5427 func.pop_comments() 5428 5429 if not self._match(TokenType.L_PAREN): 5430 return self.expression( 5431 exp.Window, 5432 comments=comments, 5433 this=this, 5434 alias=self._parse_id_var(False), 5435 over=over, 5436 ) 5437 5438 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5439 5440 first = self._match(TokenType.FIRST) 5441 if self._match_text_seq("LAST"): 5442 first = False 5443 5444 partition, order = self._parse_partition_and_order() 5445 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5446 5447 if kind: 5448 self._match(TokenType.BETWEEN) 5449 start = self._parse_window_spec() 5450 self._match(TokenType.AND) 5451 end = self._parse_window_spec() 5452 5453 spec = self.expression( 5454 exp.WindowSpec, 5455 kind=kind, 5456 start=start["value"], 5457 start_side=start["side"], 5458 end=end["value"], 5459 end_side=end["side"], 5460 ) 5461 else: 5462 spec = None 5463 5464 self._match_r_paren() 5465 5466 window = self.expression( 5467 exp.Window, 5468 comments=comments, 5469 this=this, 5470 partition_by=partition, 5471 order=order, 5472 spec=spec, 5473 alias=window_alias, 5474 over=over, 5475 first=first, 5476 ) 5477 5478 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5479 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5480 return self._parse_window(window, alias=alias) 5481 5482 return window 5483 5484 def _parse_partition_and_order( 5485 self, 5486 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5487 return self._parse_partition_by(), self._parse_order() 5488 5489 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5490 self._match(TokenType.BETWEEN) 5491 5492 return { 5493 "value": ( 5494 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5495 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5496 or self._parse_bitwise() 5497 ), 5498 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5499 } 5500 5501 def _parse_alias( 5502 self, this: t.Optional[exp.Expression], explicit: bool = False 5503 ) -> t.Optional[exp.Expression]: 5504 any_token = self._match(TokenType.ALIAS) 5505 comments = self._prev_comments or [] 5506 5507 if explicit and not any_token: 5508 return this 5509 5510 if self._match(TokenType.L_PAREN): 5511 aliases = self.expression( 5512 exp.Aliases, 5513 comments=comments, 5514 this=this, 5515 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5516 ) 5517 self._match_r_paren(aliases) 5518 return aliases 5519 5520 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5521 self.STRING_ALIASES and self._parse_string_as_identifier() 5522 ) 5523 5524 if alias: 5525 comments.extend(alias.pop_comments()) 5526 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5527 column = this.this 5528 5529 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5530 if not this.comments and column and column.comments: 5531 this.comments = column.pop_comments() 5532 5533 return this 5534 5535 def _parse_id_var( 5536 self, 5537 any_token: bool = True, 5538 tokens: t.Optional[t.Collection[TokenType]] = None, 5539 ) -> t.Optional[exp.Expression]: 5540 expression = self._parse_identifier() 5541 if not expression and ( 5542 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5543 ): 5544 quoted = self._prev.token_type == TokenType.STRING 5545 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5546 5547 return expression 5548 5549 def _parse_string(self) -> t.Optional[exp.Expression]: 5550 if self._match_set(self.STRING_PARSERS): 5551 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5552 return self._parse_placeholder() 5553 5554 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5555 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5556 5557 def _parse_number(self) -> t.Optional[exp.Expression]: 5558 if self._match_set(self.NUMERIC_PARSERS): 5559 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5560 return self._parse_placeholder() 5561 5562 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5563 if self._match(TokenType.IDENTIFIER): 5564 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5565 return self._parse_placeholder() 5566 5567 def _parse_var( 5568 self, 5569 any_token: bool = False, 5570 tokens: t.Optional[t.Collection[TokenType]] = None, 5571 upper: bool = False, 5572 ) -> t.Optional[exp.Expression]: 5573 if ( 5574 (any_token and self._advance_any()) 5575 or self._match(TokenType.VAR) 5576 or (self._match_set(tokens) if tokens else False) 5577 ): 5578 return self.expression( 5579 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5580 ) 5581 return self._parse_placeholder() 5582 5583 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5584 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5585 self._advance() 5586 return self._prev 5587 return None 5588 5589 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5590 return self._parse_var() or self._parse_string() 5591 5592 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5593 return self._parse_primary() or self._parse_var(any_token=True) 5594 5595 def _parse_null(self) -> t.Optional[exp.Expression]: 5596 if self._match_set(self.NULL_TOKENS): 5597 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5598 return self._parse_placeholder() 5599 5600 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5601 if self._match(TokenType.TRUE): 5602 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5603 if self._match(TokenType.FALSE): 5604 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5605 return self._parse_placeholder() 5606 5607 def _parse_star(self) -> t.Optional[exp.Expression]: 5608 if self._match(TokenType.STAR): 5609 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5610 return self._parse_placeholder() 5611 5612 def _parse_parameter(self) -> exp.Parameter: 5613 self._match(TokenType.L_BRACE) 5614 this = self._parse_identifier() or self._parse_primary_or_var() 5615 expression = self._match(TokenType.COLON) and ( 5616 self._parse_identifier() or self._parse_primary_or_var() 5617 ) 5618 self._match(TokenType.R_BRACE) 5619 return self.expression(exp.Parameter, this=this, expression=expression) 5620 5621 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5622 if self._match_set(self.PLACEHOLDER_PARSERS): 5623 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5624 if placeholder: 5625 return placeholder 5626 self._advance(-1) 5627 return None 5628 5629 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5630 if not self._match(TokenType.EXCEPT): 5631 return None 5632 if self._match(TokenType.L_PAREN, advance=False): 5633 return self._parse_wrapped_csv(self._parse_column) 5634 5635 except_column = self._parse_column() 5636 return [except_column] if except_column else None 5637 5638 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5639 if not self._match(TokenType.REPLACE): 5640 return None 5641 if self._match(TokenType.L_PAREN, advance=False): 5642 return self._parse_wrapped_csv(self._parse_expression) 5643 5644 replace_expression = self._parse_expression() 5645 return [replace_expression] if replace_expression else None 5646 5647 def _parse_csv( 5648 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5649 ) -> t.List[exp.Expression]: 5650 parse_result = parse_method() 5651 items = [parse_result] if parse_result is not None else [] 5652 5653 while self._match(sep): 5654 self._add_comments(parse_result) 5655 parse_result = parse_method() 5656 if parse_result is not None: 5657 items.append(parse_result) 5658 5659 return items 5660 5661 def _parse_tokens( 5662 self, parse_method: t.Callable, expressions: t.Dict 5663 ) -> t.Optional[exp.Expression]: 5664 this = parse_method() 5665 5666 while self._match_set(expressions): 5667 this = self.expression( 5668 expressions[self._prev.token_type], 5669 this=this, 5670 comments=self._prev_comments, 5671 expression=parse_method(), 5672 ) 5673 5674 return this 5675 5676 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5677 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5678 5679 def _parse_wrapped_csv( 5680 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5681 ) -> t.List[exp.Expression]: 5682 return self._parse_wrapped( 5683 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5684 ) 5685 5686 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5687 wrapped = self._match(TokenType.L_PAREN) 5688 if not wrapped and not optional: 5689 self.raise_error("Expecting (") 5690 parse_result = parse_method() 5691 if wrapped: 5692 self._match_r_paren() 5693 return parse_result 5694 5695 def _parse_expressions(self) -> t.List[exp.Expression]: 5696 return self._parse_csv(self._parse_expression) 5697 5698 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5699 return self._parse_select() or self._parse_set_operations( 5700 self._parse_expression() if alias else self._parse_conjunction() 5701 ) 5702 5703 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5704 return self._parse_query_modifiers( 5705 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5706 ) 5707 5708 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5709 this = None 5710 if self._match_texts(self.TRANSACTION_KIND): 5711 this = self._prev.text 5712 5713 self._match_texts(("TRANSACTION", "WORK")) 5714 5715 modes = [] 5716 while True: 5717 mode = [] 5718 while self._match(TokenType.VAR): 5719 mode.append(self._prev.text) 5720 5721 if mode: 5722 modes.append(" ".join(mode)) 5723 if not self._match(TokenType.COMMA): 5724 break 5725 5726 return self.expression(exp.Transaction, this=this, modes=modes) 5727 5728 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5729 chain = None 5730 savepoint = None 5731 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5732 5733 self._match_texts(("TRANSACTION", "WORK")) 5734 5735 if self._match_text_seq("TO"): 5736 self._match_text_seq("SAVEPOINT") 5737 savepoint = self._parse_id_var() 5738 5739 if self._match(TokenType.AND): 5740 chain = not self._match_text_seq("NO") 5741 self._match_text_seq("CHAIN") 5742 5743 if is_rollback: 5744 return self.expression(exp.Rollback, savepoint=savepoint) 5745 5746 return self.expression(exp.Commit, chain=chain) 5747 5748 def _parse_refresh(self) -> exp.Refresh: 5749 self._match(TokenType.TABLE) 5750 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5751 5752 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5753 if not self._match_text_seq("ADD"): 5754 return None 5755 5756 self._match(TokenType.COLUMN) 5757 exists_column = self._parse_exists(not_=True) 5758 expression = self._parse_field_def() 5759 5760 if expression: 5761 expression.set("exists", exists_column) 5762 5763 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5764 if self._match_texts(("FIRST", "AFTER")): 5765 position = self._prev.text 5766 column_position = self.expression( 5767 exp.ColumnPosition, this=self._parse_column(), position=position 5768 ) 5769 expression.set("position", column_position) 5770 5771 return expression 5772 5773 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5774 drop = self._match(TokenType.DROP) and self._parse_drop() 5775 if drop and not isinstance(drop, exp.Command): 5776 drop.set("kind", drop.args.get("kind", "COLUMN")) 5777 return drop 5778 5779 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5780 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5781 return self.expression( 5782 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5783 ) 5784 5785 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5786 index = self._index - 1 5787 5788 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5789 return self._parse_csv( 5790 lambda: self.expression( 5791 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5792 ) 5793 ) 5794 5795 self._retreat(index) 5796 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5797 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5798 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5799 5800 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5801 self._match(TokenType.COLUMN) 5802 column = self._parse_field(any_token=True) 5803 5804 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5805 return self.expression(exp.AlterColumn, this=column, drop=True) 5806 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5807 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5808 if self._match(TokenType.COMMENT): 5809 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5810 5811 self._match_text_seq("SET", "DATA") 5812 self._match_text_seq("TYPE") 5813 return self.expression( 5814 exp.AlterColumn, 5815 this=column, 5816 dtype=self._parse_types(), 5817 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5818 using=self._match(TokenType.USING) and self._parse_conjunction(), 5819 ) 5820 5821 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5822 index = self._index - 1 5823 5824 partition_exists = self._parse_exists() 5825 if self._match(TokenType.PARTITION, advance=False): 5826 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5827 5828 self._retreat(index) 5829 return self._parse_csv(self._parse_drop_column) 5830 5831 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5832 if self._match(TokenType.COLUMN): 5833 exists = self._parse_exists() 5834 old_column = self._parse_column() 5835 to = self._match_text_seq("TO") 5836 new_column = self._parse_column() 5837 5838 if old_column is None or to is None or new_column is None: 5839 return None 5840 5841 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5842 5843 self._match_text_seq("TO") 5844 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5845 5846 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5847 start = self._prev 5848 5849 if not self._match(TokenType.TABLE): 5850 return self._parse_as_command(start) 5851 5852 exists = self._parse_exists() 5853 only = self._match_text_seq("ONLY") 5854 this = self._parse_table(schema=True) 5855 5856 if self._next: 5857 self._advance() 5858 5859 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5860 if parser: 5861 actions = ensure_list(parser(self)) 5862 options = self._parse_csv(self._parse_property) 5863 5864 if not self._curr and actions: 5865 return self.expression( 5866 exp.AlterTable, 5867 this=this, 5868 exists=exists, 5869 actions=actions, 5870 only=only, 5871 options=options, 5872 ) 5873 5874 return self._parse_as_command(start) 5875 5876 def _parse_merge(self) -> exp.Merge: 5877 self._match(TokenType.INTO) 5878 target = self._parse_table() 5879 5880 if target and self._match(TokenType.ALIAS, advance=False): 5881 target.set("alias", self._parse_table_alias()) 5882 5883 self._match(TokenType.USING) 5884 using = self._parse_table() 5885 5886 self._match(TokenType.ON) 5887 on = self._parse_conjunction() 5888 5889 return self.expression( 5890 exp.Merge, 5891 this=target, 5892 using=using, 5893 on=on, 5894 expressions=self._parse_when_matched(), 5895 ) 5896 5897 def _parse_when_matched(self) -> t.List[exp.When]: 5898 whens = [] 5899 5900 while self._match(TokenType.WHEN): 5901 matched = not self._match(TokenType.NOT) 5902 self._match_text_seq("MATCHED") 5903 source = ( 5904 False 5905 if self._match_text_seq("BY", "TARGET") 5906 else self._match_text_seq("BY", "SOURCE") 5907 ) 5908 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5909 5910 self._match(TokenType.THEN) 5911 5912 if self._match(TokenType.INSERT): 5913 _this = self._parse_star() 5914 if _this: 5915 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5916 else: 5917 then = self.expression( 5918 exp.Insert, 5919 this=self._parse_value(), 5920 expression=self._match_text_seq("VALUES") and self._parse_value(), 5921 ) 5922 elif self._match(TokenType.UPDATE): 5923 expressions = self._parse_star() 5924 if expressions: 5925 then = self.expression(exp.Update, expressions=expressions) 5926 else: 5927 then = self.expression( 5928 exp.Update, 5929 expressions=self._match(TokenType.SET) 5930 and self._parse_csv(self._parse_equality), 5931 ) 5932 elif self._match(TokenType.DELETE): 5933 then = self.expression(exp.Var, this=self._prev.text) 5934 else: 5935 then = None 5936 5937 whens.append( 5938 self.expression( 5939 exp.When, 5940 matched=matched, 5941 source=source, 5942 condition=condition, 5943 then=then, 5944 ) 5945 ) 5946 return whens 5947 5948 def _parse_show(self) -> t.Optional[exp.Expression]: 5949 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5950 if parser: 5951 return parser(self) 5952 return self._parse_as_command(self._prev) 5953 5954 def _parse_set_item_assignment( 5955 self, kind: t.Optional[str] = None 5956 ) -> t.Optional[exp.Expression]: 5957 index = self._index 5958 5959 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5960 return self._parse_set_transaction(global_=kind == "GLOBAL") 5961 5962 left = self._parse_primary() or self._parse_id_var() 5963 assignment_delimiter = self._match_texts(("=", "TO")) 5964 5965 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5966 self._retreat(index) 5967 return None 5968 5969 right = self._parse_statement() or self._parse_id_var() 5970 this = self.expression(exp.EQ, this=left, expression=right) 5971 5972 return self.expression(exp.SetItem, this=this, kind=kind) 5973 5974 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5975 self._match_text_seq("TRANSACTION") 5976 characteristics = self._parse_csv( 5977 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5978 ) 5979 return self.expression( 5980 exp.SetItem, 5981 expressions=characteristics, 5982 kind="TRANSACTION", 5983 **{"global": global_}, # type: ignore 5984 ) 5985 5986 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5987 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5988 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5989 5990 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5991 index = self._index 5992 set_ = self.expression( 5993 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5994 ) 5995 5996 if self._curr: 5997 self._retreat(index) 5998 return self._parse_as_command(self._prev) 5999 6000 return set_ 6001 6002 def _parse_var_from_options( 6003 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6004 ) -> t.Optional[exp.Var]: 6005 start = self._curr 6006 if not start: 6007 return None 6008 6009 option = start.text.upper() 6010 continuations = options.get(option) 6011 6012 index = self._index 6013 self._advance() 6014 for keywords in continuations or []: 6015 if isinstance(keywords, str): 6016 keywords = (keywords,) 6017 6018 if self._match_text_seq(*keywords): 6019 option = f"{option} {' '.join(keywords)}" 6020 break 6021 else: 6022 if continuations or continuations is None: 6023 if raise_unmatched: 6024 self.raise_error(f"Unknown option {option}") 6025 6026 self._retreat(index) 6027 return None 6028 6029 return exp.var(option) 6030 6031 def _parse_as_command(self, start: Token) -> exp.Command: 6032 while self._curr: 6033 self._advance() 6034 text = self._find_sql(start, self._prev) 6035 size = len(start.text) 6036 self._warn_unsupported() 6037 return exp.Command(this=text[:size], expression=text[size:]) 6038 6039 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6040 settings = [] 6041 6042 self._match_l_paren() 6043 kind = self._parse_id_var() 6044 6045 if self._match(TokenType.L_PAREN): 6046 while True: 6047 key = self._parse_id_var() 6048 value = self._parse_primary() 6049 6050 if not key and value is None: 6051 break 6052 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6053 self._match(TokenType.R_PAREN) 6054 6055 self._match_r_paren() 6056 6057 return self.expression( 6058 exp.DictProperty, 6059 this=this, 6060 kind=kind.this if kind else None, 6061 settings=settings, 6062 ) 6063 6064 def _parse_dict_range(self, this: str) -> exp.DictRange: 6065 self._match_l_paren() 6066 has_min = self._match_text_seq("MIN") 6067 if has_min: 6068 min = self._parse_var() or self._parse_primary() 6069 self._match_text_seq("MAX") 6070 max = self._parse_var() or self._parse_primary() 6071 else: 6072 max = self._parse_var() or self._parse_primary() 6073 min = exp.Literal.number(0) 6074 self._match_r_paren() 6075 return self.expression(exp.DictRange, this=this, min=min, max=max) 6076 6077 def _parse_comprehension( 6078 self, this: t.Optional[exp.Expression] 6079 ) -> t.Optional[exp.Comprehension]: 6080 index = self._index 6081 expression = self._parse_column() 6082 if not self._match(TokenType.IN): 6083 self._retreat(index - 1) 6084 return None 6085 iterator = self._parse_column() 6086 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6087 return self.expression( 6088 exp.Comprehension, 6089 this=this, 6090 expression=expression, 6091 iterator=iterator, 6092 condition=condition, 6093 ) 6094 6095 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6096 if self._match(TokenType.HEREDOC_STRING): 6097 return self.expression(exp.Heredoc, this=self._prev.text) 6098 6099 if not self._match_text_seq("$"): 6100 return None 6101 6102 tags = ["$"] 6103 tag_text = None 6104 6105 if self._is_connected(): 6106 self._advance() 6107 tags.append(self._prev.text.upper()) 6108 else: 6109 self.raise_error("No closing $ found") 6110 6111 if tags[-1] != "$": 6112 if self._is_connected() and self._match_text_seq("$"): 6113 tag_text = tags[-1] 6114 tags.append("$") 6115 else: 6116 self.raise_error("No closing $ found") 6117 6118 heredoc_start = self._curr 6119 6120 while self._curr: 6121 if self._match_text_seq(*tags, advance=False): 6122 this = self._find_sql(heredoc_start, self._prev) 6123 self._advance(len(tags)) 6124 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6125 6126 self._advance() 6127 6128 self.raise_error(f"No closing {''.join(tags)} found") 6129 return None 6130 6131 def _find_parser( 6132 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6133 ) -> t.Optional[t.Callable]: 6134 if not self._curr: 6135 return None 6136 6137 index = self._index 6138 this = [] 6139 while True: 6140 # The current token might be multiple words 6141 curr = self._curr.text.upper() 6142 key = curr.split(" ") 6143 this.append(curr) 6144 6145 self._advance() 6146 result, trie = in_trie(trie, key) 6147 if result == TrieResult.FAILED: 6148 break 6149 6150 if result == TrieResult.EXISTS: 6151 subparser = parsers[" ".join(this)] 6152 return subparser 6153 6154 self._retreat(index) 6155 return None 6156 6157 def _match(self, token_type, advance=True, expression=None): 6158 if not self._curr: 6159 return None 6160 6161 if self._curr.token_type == token_type: 6162 if advance: 6163 self._advance() 6164 self._add_comments(expression) 6165 return True 6166 6167 return None 6168 6169 def _match_set(self, types, advance=True): 6170 if not self._curr: 6171 return None 6172 6173 if self._curr.token_type in types: 6174 if advance: 6175 self._advance() 6176 return True 6177 6178 return None 6179 6180 def _match_pair(self, token_type_a, token_type_b, advance=True): 6181 if not self._curr or not self._next: 6182 return None 6183 6184 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6185 if advance: 6186 self._advance(2) 6187 return True 6188 6189 return None 6190 6191 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6192 if not self._match(TokenType.L_PAREN, expression=expression): 6193 self.raise_error("Expecting (") 6194 6195 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6196 if not self._match(TokenType.R_PAREN, expression=expression): 6197 self.raise_error("Expecting )") 6198 6199 def _match_texts(self, texts, advance=True): 6200 if self._curr and self._curr.text.upper() in texts: 6201 if advance: 6202 self._advance() 6203 return True 6204 return None 6205 6206 def _match_text_seq(self, *texts, advance=True): 6207 index = self._index 6208 for text in texts: 6209 if self._curr and self._curr.text.upper() == text: 6210 self._advance() 6211 else: 6212 self._retreat(index) 6213 return None 6214 6215 if not advance: 6216 self._retreat(index) 6217 6218 return True 6219 6220 def _replace_lambda( 6221 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6222 ) -> t.Optional[exp.Expression]: 6223 if not node: 6224 return node 6225 6226 for column in node.find_all(exp.Column): 6227 if column.parts[0].name in lambda_variables: 6228 dot_or_id = column.to_dot() if column.table else column.this 6229 parent = column.parent 6230 6231 while isinstance(parent, exp.Dot): 6232 if not isinstance(parent.parent, exp.Dot): 6233 parent.replace(dot_or_id) 6234 break 6235 parent = parent.parent 6236 else: 6237 if column is node: 6238 node = dot_or_id 6239 else: 6240 column.replace(dot_or_id) 6241 return node 6242 6243 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6244 start = self._prev 6245 6246 # Not to be confused with TRUNCATE(number, decimals) function call 6247 if self._match(TokenType.L_PAREN): 6248 self._retreat(self._index - 2) 6249 return self._parse_function() 6250 6251 # Clickhouse supports TRUNCATE DATABASE as well 6252 is_database = self._match(TokenType.DATABASE) 6253 6254 self._match(TokenType.TABLE) 6255 6256 exists = self._parse_exists(not_=False) 6257 6258 expressions = self._parse_csv( 6259 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6260 ) 6261 6262 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6263 6264 if self._match_text_seq("RESTART", "IDENTITY"): 6265 identity = "RESTART" 6266 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6267 identity = "CONTINUE" 6268 else: 6269 identity = None 6270 6271 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6272 option = self._prev.text 6273 else: 6274 option = None 6275 6276 partition = self._parse_partition() 6277 6278 # Fallback case 6279 if self._curr: 6280 return self._parse_as_command(start) 6281 6282 return self.expression( 6283 exp.TruncateTable, 6284 expressions=expressions, 6285 is_database=is_database, 6286 exists=exists, 6287 cluster=cluster, 6288 identity=identity, 6289 option=option, 6290 partition=partition, 6291 ) 6292 6293 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6294 this = self._parse_ordered(self._parse_opclass) 6295 6296 if not self._match(TokenType.WITH): 6297 return this 6298 6299 op = self._parse_var(any_token=True) 6300 6301 return self.expression(exp.WithOperator, this=this, op=op) 6302 6303 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6304 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6305 6306 options = [] 6307 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6308 option = self._parse_unquoted_field() 6309 value = None 6310 # Some options are defined as functions with the values as params 6311 if not isinstance(option, exp.Func): 6312 # Different dialects might separate options and values by white space, "=" and "AS" 6313 self._match(TokenType.EQ) 6314 self._match(TokenType.ALIAS) 6315 value = self._parse_unquoted_field() 6316 6317 param = self.expression(exp.CopyParameter, this=option, expression=value) 6318 options.append(param) 6319 6320 if sep: 6321 self._match(sep) 6322 6323 return options 6324 6325 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6326 def parse_options(): 6327 opts = [] 6328 self._match(TokenType.EQ) 6329 self._match(TokenType.L_PAREN) 6330 while self._curr and not self._match(TokenType.R_PAREN): 6331 opts.append(self._parse_conjunction()) 6332 return opts 6333 6334 expr = self.expression(exp.Credentials) 6335 6336 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6337 expr.set("storage", self._parse_conjunction()) 6338 if self._match_text_seq("CREDENTIALS"): 6339 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6340 creds = parse_options() if self._match(TokenType.EQ) else self._parse_field() 6341 expr.set("credentials", creds) 6342 if self._match_text_seq("ENCRYPTION"): 6343 expr.set("encryption", parse_options()) 6344 if self._match_text_seq("IAM_ROLE"): 6345 expr.set("iam_role", self._parse_field()) 6346 if self._match_text_seq("REGION"): 6347 expr.set("region", self._parse_field()) 6348 6349 return expr 6350 6351 def _parse_copy(self): 6352 start = self._prev 6353 6354 self._match(TokenType.INTO) 6355 6356 this = ( 6357 self._parse_conjunction() 6358 if self._match(TokenType.L_PAREN, advance=False) 6359 else self._parse_table(schema=True) 6360 ) 6361 6362 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6363 6364 files = self._parse_csv(self._parse_conjunction) 6365 credentials = self._parse_credentials() 6366 6367 self._match_text_seq("WITH") 6368 6369 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6370 6371 # Fallback case 6372 if self._curr: 6373 return self._parse_as_command(start) 6374 6375 return self.expression( 6376 exp.Copy, 6377 this=this, 6378 kind=kind, 6379 credentials=credentials, 6380 files=files, 6381 params=params, 6382 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1137 def __init__( 1138 self, 1139 error_level: t.Optional[ErrorLevel] = None, 1140 error_message_context: int = 100, 1141 max_errors: int = 3, 1142 dialect: DialectType = None, 1143 ): 1144 from sqlglot.dialects import Dialect 1145 1146 self.error_level = error_level or ErrorLevel.IMMEDIATE 1147 self.error_message_context = error_message_context 1148 self.max_errors = max_errors 1149 self.dialect = Dialect.get_or_raise(dialect) 1150 self.reset()
1162 def parse( 1163 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1164 ) -> t.List[t.Optional[exp.Expression]]: 1165 """ 1166 Parses a list of tokens and returns a list of syntax trees, one tree 1167 per parsed SQL statement. 1168 1169 Args: 1170 raw_tokens: The list of tokens. 1171 sql: The original SQL string, used to produce helpful debug messages. 1172 1173 Returns: 1174 The list of the produced syntax trees. 1175 """ 1176 return self._parse( 1177 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1178 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1180 def parse_into( 1181 self, 1182 expression_types: exp.IntoType, 1183 raw_tokens: t.List[Token], 1184 sql: t.Optional[str] = None, 1185 ) -> t.List[t.Optional[exp.Expression]]: 1186 """ 1187 Parses a list of tokens into a given Expression type. If a collection of Expression 1188 types is given instead, this method will try to parse the token list into each one 1189 of them, stopping at the first for which the parsing succeeds. 1190 1191 Args: 1192 expression_types: The expression type(s) to try and parse the token list into. 1193 raw_tokens: The list of tokens. 1194 sql: The original SQL string, used to produce helpful debug messages. 1195 1196 Returns: 1197 The target Expression. 1198 """ 1199 errors = [] 1200 for expression_type in ensure_list(expression_types): 1201 parser = self.EXPRESSION_PARSERS.get(expression_type) 1202 if not parser: 1203 raise TypeError(f"No parser registered for {expression_type}") 1204 1205 try: 1206 return self._parse(parser, raw_tokens, sql) 1207 except ParseError as e: 1208 e.errors[0]["into_expression"] = expression_type 1209 errors.append(e) 1210 1211 raise ParseError( 1212 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1213 errors=merge_errors(errors), 1214 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1251 def check_errors(self) -> None: 1252 """Logs or raises any found errors, depending on the chosen error level setting.""" 1253 if self.error_level == ErrorLevel.WARN: 1254 for error in self.errors: 1255 logger.error(str(error)) 1256 elif self.error_level == ErrorLevel.RAISE and self.errors: 1257 raise ParseError( 1258 concat_messages(self.errors, self.max_errors), 1259 errors=merge_errors(self.errors), 1260 )
Logs or raises any found errors, depending on the chosen error level setting.
1262 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1263 """ 1264 Appends an error in the list of recorded errors or raises it, depending on the chosen 1265 error level setting. 1266 """ 1267 token = token or self._curr or self._prev or Token.string("") 1268 start = token.start 1269 end = token.end + 1 1270 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1271 highlight = self.sql[start:end] 1272 end_context = self.sql[end : end + self.error_message_context] 1273 1274 error = ParseError.new( 1275 f"{message}. Line {token.line}, Col: {token.col}.\n" 1276 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1277 description=message, 1278 line=token.line, 1279 col=token.col, 1280 start_context=start_context, 1281 highlight=highlight, 1282 end_context=end_context, 1283 ) 1284 1285 if self.error_level == ErrorLevel.IMMEDIATE: 1286 raise error 1287 1288 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1290 def expression( 1291 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1292 ) -> E: 1293 """ 1294 Creates a new, validated Expression. 1295 1296 Args: 1297 exp_class: The expression class to instantiate. 1298 comments: An optional list of comments to attach to the expression. 1299 kwargs: The arguments to set for the expression along with their respective values. 1300 1301 Returns: 1302 The target expression. 1303 """ 1304 instance = exp_class(**kwargs) 1305 instance.add_comments(comments) if comments else self._add_comments(instance) 1306 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1313 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1314 """ 1315 Validates an Expression, making sure that all its mandatory arguments are set. 1316 1317 Args: 1318 expression: The expression to validate. 1319 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1320 1321 Returns: 1322 The validated expression. 1323 """ 1324 if self.error_level != ErrorLevel.IGNORE: 1325 for error_message in expression.error_messages(args): 1326 self.raise_error(error_message) 1327 1328 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.