Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_list, seq_get
  10from sqlglot.time import format_time
  11from sqlglot.tokens import Token, Tokenizer, TokenType
  12from sqlglot.trie import TrieResult, in_trie, new_trie
  13
  14if t.TYPE_CHECKING:
  15    from sqlglot._typing import E, Lit
  16    from sqlglot.dialects.dialect import Dialect, DialectType
  17
  18    T = t.TypeVar("T")
  19
  20logger = logging.getLogger("sqlglot")
  21
  22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]]
  23
  24
  25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
  26    if len(args) == 1 and args[0].is_star:
  27        return exp.StarMap(this=args[0])
  28
  29    keys = []
  30    values = []
  31    for i in range(0, len(args), 2):
  32        keys.append(args[i])
  33        values.append(args[i + 1])
  34
  35    return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
  36
  37
  38def build_like(args: t.List) -> exp.Escape | exp.Like:
  39    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  40    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  41
  42
  43def binary_range_parser(
  44    expr_type: t.Type[exp.Expression], reverse_args: bool = False
  45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  46    def _parse_binary_range(
  47        self: Parser, this: t.Optional[exp.Expression]
  48    ) -> t.Optional[exp.Expression]:
  49        expression = self._parse_bitwise()
  50        if reverse_args:
  51            this, expression = expression, this
  52        return self._parse_escape(self.expression(expr_type, this=this, expression=expression))
  53
  54    return _parse_binary_range
  55
  56
  57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func:
  58    # Default argument order is base, expression
  59    this = seq_get(args, 0)
  60    expression = seq_get(args, 1)
  61
  62    if expression:
  63        if not dialect.LOG_BASE_FIRST:
  64            this, expression = expression, this
  65        return exp.Log(this=this, expression=expression)
  66
  67    return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
  68
  69
  70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex:
  71    arg = seq_get(args, 0)
  72    return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg)
  73
  74
  75def build_lower(args: t.List) -> exp.Lower | exp.Hex:
  76    # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation
  77    arg = seq_get(args, 0)
  78    return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg)
  79
  80
  81def build_upper(args: t.List) -> exp.Upper | exp.Hex:
  82    # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation
  83    arg = seq_get(args, 0)
  84    return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg)
  85
  86
  87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]:
  88    def _builder(args: t.List, dialect: Dialect) -> E:
  89        expression = expr_type(
  90            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
  91        )
  92        if len(args) > 2 and expr_type is exp.JSONExtract:
  93            expression.set("expressions", args[2:])
  94
  95        return expression
  96
  97    return _builder
  98
  99
 100def build_mod(args: t.List) -> exp.Mod:
 101    this = seq_get(args, 0)
 102    expression = seq_get(args, 1)
 103
 104    # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7
 105    this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this
 106    expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression
 107
 108    return exp.Mod(this=this, expression=expression)
 109
 110
 111def build_pad(args: t.List, is_left: bool = True):
 112    return exp.Pad(
 113        this=seq_get(args, 0),
 114        expression=seq_get(args, 1),
 115        fill_pattern=seq_get(args, 2),
 116        is_left=is_left,
 117    )
 118
 119
 120class _Parser(type):
 121    def __new__(cls, clsname, bases, attrs):
 122        klass = super().__new__(cls, clsname, bases, attrs)
 123
 124        klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
 125        klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS)
 126
 127        return klass
 128
 129
 130class Parser(metaclass=_Parser):
 131    """
 132    Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
 133
 134    Args:
 135        error_level: The desired error level.
 136            Default: ErrorLevel.IMMEDIATE
 137        error_message_context: The amount of context to capture from a query string when displaying
 138            the error message (in number of characters).
 139            Default: 100
 140        max_errors: Maximum number of error messages to include in a raised ParseError.
 141            This is only relevant if error_level is ErrorLevel.RAISE.
 142            Default: 3
 143    """
 144
 145    FUNCTIONS: t.Dict[str, t.Callable] = {
 146        **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()},
 147        "CONCAT": lambda args, dialect: exp.Concat(
 148            expressions=args,
 149            safe=not dialect.STRICT_STRING_CONCAT,
 150            coalesce=dialect.CONCAT_COALESCE,
 151        ),
 152        "CONCAT_WS": lambda args, dialect: exp.ConcatWs(
 153            expressions=args,
 154            safe=not dialect.STRICT_STRING_CONCAT,
 155            coalesce=dialect.CONCAT_COALESCE,
 156        ),
 157        "DATE_TO_DATE_STR": lambda args: exp.Cast(
 158            this=seq_get(args, 0),
 159            to=exp.DataType(this=exp.DataType.Type.TEXT),
 160        ),
 161        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
 162        "HEX": build_hex,
 163        "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract),
 164        "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar),
 165        "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar),
 166        "LIKE": build_like,
 167        "LOG": build_logarithm,
 168        "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)),
 169        "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)),
 170        "LOWER": build_lower,
 171        "LPAD": lambda args: build_pad(args),
 172        "LEFTPAD": lambda args: build_pad(args),
 173        "MOD": build_mod,
 174        "RPAD": lambda args: build_pad(args, is_left=False),
 175        "RIGHTPAD": lambda args: build_pad(args, is_left=False),
 176        "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0))
 177        if len(args) != 2
 178        else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)),
 179        "TIME_TO_TIME_STR": lambda args: exp.Cast(
 180            this=seq_get(args, 0),
 181            to=exp.DataType(this=exp.DataType.Type.TEXT),
 182        ),
 183        "TO_HEX": build_hex,
 184        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
 185            this=exp.Cast(
 186                this=seq_get(args, 0),
 187                to=exp.DataType(this=exp.DataType.Type.TEXT),
 188            ),
 189            start=exp.Literal.number(1),
 190            length=exp.Literal.number(10),
 191        ),
 192        "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))),
 193        "UPPER": build_upper,
 194        "VAR_MAP": build_var_map,
 195    }
 196
 197    NO_PAREN_FUNCTIONS = {
 198        TokenType.CURRENT_DATE: exp.CurrentDate,
 199        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 200        TokenType.CURRENT_TIME: exp.CurrentTime,
 201        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 202        TokenType.CURRENT_USER: exp.CurrentUser,
 203    }
 204
 205    STRUCT_TYPE_TOKENS = {
 206        TokenType.NESTED,
 207        TokenType.OBJECT,
 208        TokenType.STRUCT,
 209    }
 210
 211    NESTED_TYPE_TOKENS = {
 212        TokenType.ARRAY,
 213        TokenType.LIST,
 214        TokenType.LOWCARDINALITY,
 215        TokenType.MAP,
 216        TokenType.NULLABLE,
 217        *STRUCT_TYPE_TOKENS,
 218    }
 219
 220    ENUM_TYPE_TOKENS = {
 221        TokenType.ENUM,
 222        TokenType.ENUM8,
 223        TokenType.ENUM16,
 224    }
 225
 226    AGGREGATE_TYPE_TOKENS = {
 227        TokenType.AGGREGATEFUNCTION,
 228        TokenType.SIMPLEAGGREGATEFUNCTION,
 229    }
 230
 231    TYPE_TOKENS = {
 232        TokenType.BIT,
 233        TokenType.BOOLEAN,
 234        TokenType.TINYINT,
 235        TokenType.UTINYINT,
 236        TokenType.SMALLINT,
 237        TokenType.USMALLINT,
 238        TokenType.INT,
 239        TokenType.UINT,
 240        TokenType.BIGINT,
 241        TokenType.UBIGINT,
 242        TokenType.INT128,
 243        TokenType.UINT128,
 244        TokenType.INT256,
 245        TokenType.UINT256,
 246        TokenType.MEDIUMINT,
 247        TokenType.UMEDIUMINT,
 248        TokenType.FIXEDSTRING,
 249        TokenType.FLOAT,
 250        TokenType.DOUBLE,
 251        TokenType.CHAR,
 252        TokenType.NCHAR,
 253        TokenType.VARCHAR,
 254        TokenType.NVARCHAR,
 255        TokenType.BPCHAR,
 256        TokenType.TEXT,
 257        TokenType.MEDIUMTEXT,
 258        TokenType.LONGTEXT,
 259        TokenType.MEDIUMBLOB,
 260        TokenType.LONGBLOB,
 261        TokenType.BINARY,
 262        TokenType.VARBINARY,
 263        TokenType.JSON,
 264        TokenType.JSONB,
 265        TokenType.INTERVAL,
 266        TokenType.TINYBLOB,
 267        TokenType.TINYTEXT,
 268        TokenType.TIME,
 269        TokenType.TIMETZ,
 270        TokenType.TIMESTAMP,
 271        TokenType.TIMESTAMP_S,
 272        TokenType.TIMESTAMP_MS,
 273        TokenType.TIMESTAMP_NS,
 274        TokenType.TIMESTAMPTZ,
 275        TokenType.TIMESTAMPLTZ,
 276        TokenType.TIMESTAMPNTZ,
 277        TokenType.DATETIME,
 278        TokenType.DATETIME64,
 279        TokenType.DATE,
 280        TokenType.DATE32,
 281        TokenType.INT4RANGE,
 282        TokenType.INT4MULTIRANGE,
 283        TokenType.INT8RANGE,
 284        TokenType.INT8MULTIRANGE,
 285        TokenType.NUMRANGE,
 286        TokenType.NUMMULTIRANGE,
 287        TokenType.TSRANGE,
 288        TokenType.TSMULTIRANGE,
 289        TokenType.TSTZRANGE,
 290        TokenType.TSTZMULTIRANGE,
 291        TokenType.DATERANGE,
 292        TokenType.DATEMULTIRANGE,
 293        TokenType.DECIMAL,
 294        TokenType.UDECIMAL,
 295        TokenType.BIGDECIMAL,
 296        TokenType.UUID,
 297        TokenType.GEOGRAPHY,
 298        TokenType.GEOMETRY,
 299        TokenType.HLLSKETCH,
 300        TokenType.HSTORE,
 301        TokenType.PSEUDO_TYPE,
 302        TokenType.SUPER,
 303        TokenType.SERIAL,
 304        TokenType.SMALLSERIAL,
 305        TokenType.BIGSERIAL,
 306        TokenType.XML,
 307        TokenType.YEAR,
 308        TokenType.UNIQUEIDENTIFIER,
 309        TokenType.USERDEFINED,
 310        TokenType.MONEY,
 311        TokenType.SMALLMONEY,
 312        TokenType.ROWVERSION,
 313        TokenType.IMAGE,
 314        TokenType.VARIANT,
 315        TokenType.VECTOR,
 316        TokenType.OBJECT,
 317        TokenType.OBJECT_IDENTIFIER,
 318        TokenType.INET,
 319        TokenType.IPADDRESS,
 320        TokenType.IPPREFIX,
 321        TokenType.IPV4,
 322        TokenType.IPV6,
 323        TokenType.UNKNOWN,
 324        TokenType.NULL,
 325        TokenType.NAME,
 326        TokenType.TDIGEST,
 327        *ENUM_TYPE_TOKENS,
 328        *NESTED_TYPE_TOKENS,
 329        *AGGREGATE_TYPE_TOKENS,
 330    }
 331
 332    SIGNED_TO_UNSIGNED_TYPE_TOKEN = {
 333        TokenType.BIGINT: TokenType.UBIGINT,
 334        TokenType.INT: TokenType.UINT,
 335        TokenType.MEDIUMINT: TokenType.UMEDIUMINT,
 336        TokenType.SMALLINT: TokenType.USMALLINT,
 337        TokenType.TINYINT: TokenType.UTINYINT,
 338        TokenType.DECIMAL: TokenType.UDECIMAL,
 339    }
 340
 341    SUBQUERY_PREDICATES = {
 342        TokenType.ANY: exp.Any,
 343        TokenType.ALL: exp.All,
 344        TokenType.EXISTS: exp.Exists,
 345        TokenType.SOME: exp.Any,
 346    }
 347
 348    RESERVED_TOKENS = {
 349        *Tokenizer.SINGLE_TOKENS.values(),
 350        TokenType.SELECT,
 351    } - {TokenType.IDENTIFIER}
 352
 353    DB_CREATABLES = {
 354        TokenType.DATABASE,
 355        TokenType.DICTIONARY,
 356        TokenType.MODEL,
 357        TokenType.SCHEMA,
 358        TokenType.SEQUENCE,
 359        TokenType.STORAGE_INTEGRATION,
 360        TokenType.TABLE,
 361        TokenType.TAG,
 362        TokenType.VIEW,
 363        TokenType.WAREHOUSE,
 364        TokenType.STREAMLIT,
 365    }
 366
 367    CREATABLES = {
 368        TokenType.COLUMN,
 369        TokenType.CONSTRAINT,
 370        TokenType.FOREIGN_KEY,
 371        TokenType.FUNCTION,
 372        TokenType.INDEX,
 373        TokenType.PROCEDURE,
 374        *DB_CREATABLES,
 375    }
 376
 377    # Tokens that can represent identifiers
 378    ID_VAR_TOKENS = {
 379        TokenType.ALL,
 380        TokenType.VAR,
 381        TokenType.ANTI,
 382        TokenType.APPLY,
 383        TokenType.ASC,
 384        TokenType.ASOF,
 385        TokenType.AUTO_INCREMENT,
 386        TokenType.BEGIN,
 387        TokenType.BPCHAR,
 388        TokenType.CACHE,
 389        TokenType.CASE,
 390        TokenType.COLLATE,
 391        TokenType.COMMAND,
 392        TokenType.COMMENT,
 393        TokenType.COMMIT,
 394        TokenType.CONSTRAINT,
 395        TokenType.COPY,
 396        TokenType.DEFAULT,
 397        TokenType.DELETE,
 398        TokenType.DESC,
 399        TokenType.DESCRIBE,
 400        TokenType.DICTIONARY,
 401        TokenType.DIV,
 402        TokenType.END,
 403        TokenType.EXECUTE,
 404        TokenType.ESCAPE,
 405        TokenType.FALSE,
 406        TokenType.FIRST,
 407        TokenType.FILTER,
 408        TokenType.FINAL,
 409        TokenType.FORMAT,
 410        TokenType.FULL,
 411        TokenType.IDENTIFIER,
 412        TokenType.IS,
 413        TokenType.ISNULL,
 414        TokenType.INTERVAL,
 415        TokenType.KEEP,
 416        TokenType.KILL,
 417        TokenType.LEFT,
 418        TokenType.LOAD,
 419        TokenType.MERGE,
 420        TokenType.NATURAL,
 421        TokenType.NEXT,
 422        TokenType.OFFSET,
 423        TokenType.OPERATOR,
 424        TokenType.ORDINALITY,
 425        TokenType.OVERLAPS,
 426        TokenType.OVERWRITE,
 427        TokenType.PARTITION,
 428        TokenType.PERCENT,
 429        TokenType.PIVOT,
 430        TokenType.PRAGMA,
 431        TokenType.RANGE,
 432        TokenType.RECURSIVE,
 433        TokenType.REFERENCES,
 434        TokenType.REFRESH,
 435        TokenType.REPLACE,
 436        TokenType.RIGHT,
 437        TokenType.ROLLUP,
 438        TokenType.ROW,
 439        TokenType.ROWS,
 440        TokenType.SEMI,
 441        TokenType.SET,
 442        TokenType.SETTINGS,
 443        TokenType.SHOW,
 444        TokenType.TEMPORARY,
 445        TokenType.TOP,
 446        TokenType.TRUE,
 447        TokenType.TRUNCATE,
 448        TokenType.UNIQUE,
 449        TokenType.UNNEST,
 450        TokenType.UNPIVOT,
 451        TokenType.UPDATE,
 452        TokenType.USE,
 453        TokenType.VOLATILE,
 454        TokenType.WINDOW,
 455        *CREATABLES,
 456        *SUBQUERY_PREDICATES,
 457        *TYPE_TOKENS,
 458        *NO_PAREN_FUNCTIONS,
 459    }
 460
 461    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 462
 463    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 464        TokenType.ANTI,
 465        TokenType.APPLY,
 466        TokenType.ASOF,
 467        TokenType.FULL,
 468        TokenType.LEFT,
 469        TokenType.LOCK,
 470        TokenType.NATURAL,
 471        TokenType.OFFSET,
 472        TokenType.RIGHT,
 473        TokenType.SEMI,
 474        TokenType.WINDOW,
 475    }
 476
 477    ALIAS_TOKENS = ID_VAR_TOKENS
 478
 479    ARRAY_CONSTRUCTORS = {
 480        "ARRAY": exp.Array,
 481        "LIST": exp.List,
 482    }
 483
 484    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 485
 486    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 487
 488    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 489
 490    FUNC_TOKENS = {
 491        TokenType.COLLATE,
 492        TokenType.COMMAND,
 493        TokenType.CURRENT_DATE,
 494        TokenType.CURRENT_DATETIME,
 495        TokenType.CURRENT_TIMESTAMP,
 496        TokenType.CURRENT_TIME,
 497        TokenType.CURRENT_USER,
 498        TokenType.FILTER,
 499        TokenType.FIRST,
 500        TokenType.FORMAT,
 501        TokenType.GLOB,
 502        TokenType.IDENTIFIER,
 503        TokenType.INDEX,
 504        TokenType.ISNULL,
 505        TokenType.ILIKE,
 506        TokenType.INSERT,
 507        TokenType.LIKE,
 508        TokenType.MERGE,
 509        TokenType.OFFSET,
 510        TokenType.PRIMARY_KEY,
 511        TokenType.RANGE,
 512        TokenType.REPLACE,
 513        TokenType.RLIKE,
 514        TokenType.ROW,
 515        TokenType.UNNEST,
 516        TokenType.VAR,
 517        TokenType.LEFT,
 518        TokenType.RIGHT,
 519        TokenType.SEQUENCE,
 520        TokenType.DATE,
 521        TokenType.DATETIME,
 522        TokenType.TABLE,
 523        TokenType.TIMESTAMP,
 524        TokenType.TIMESTAMPTZ,
 525        TokenType.TRUNCATE,
 526        TokenType.WINDOW,
 527        TokenType.XOR,
 528        *TYPE_TOKENS,
 529        *SUBQUERY_PREDICATES,
 530    }
 531
 532    CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = {
 533        TokenType.AND: exp.And,
 534    }
 535
 536    ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = {
 537        TokenType.COLON_EQ: exp.PropertyEQ,
 538    }
 539
 540    DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = {
 541        TokenType.OR: exp.Or,
 542    }
 543
 544    EQUALITY = {
 545        TokenType.EQ: exp.EQ,
 546        TokenType.NEQ: exp.NEQ,
 547        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 548    }
 549
 550    COMPARISON = {
 551        TokenType.GT: exp.GT,
 552        TokenType.GTE: exp.GTE,
 553        TokenType.LT: exp.LT,
 554        TokenType.LTE: exp.LTE,
 555    }
 556
 557    BITWISE = {
 558        TokenType.AMP: exp.BitwiseAnd,
 559        TokenType.CARET: exp.BitwiseXor,
 560        TokenType.PIPE: exp.BitwiseOr,
 561    }
 562
 563    TERM = {
 564        TokenType.DASH: exp.Sub,
 565        TokenType.PLUS: exp.Add,
 566        TokenType.MOD: exp.Mod,
 567        TokenType.COLLATE: exp.Collate,
 568    }
 569
 570    FACTOR = {
 571        TokenType.DIV: exp.IntDiv,
 572        TokenType.LR_ARROW: exp.Distance,
 573        TokenType.SLASH: exp.Div,
 574        TokenType.STAR: exp.Mul,
 575    }
 576
 577    EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {}
 578
 579    TIMES = {
 580        TokenType.TIME,
 581        TokenType.TIMETZ,
 582    }
 583
 584    TIMESTAMPS = {
 585        TokenType.TIMESTAMP,
 586        TokenType.TIMESTAMPTZ,
 587        TokenType.TIMESTAMPLTZ,
 588        *TIMES,
 589    }
 590
 591    SET_OPERATIONS = {
 592        TokenType.UNION,
 593        TokenType.INTERSECT,
 594        TokenType.EXCEPT,
 595    }
 596
 597    JOIN_METHODS = {
 598        TokenType.ASOF,
 599        TokenType.NATURAL,
 600        TokenType.POSITIONAL,
 601    }
 602
 603    JOIN_SIDES = {
 604        TokenType.LEFT,
 605        TokenType.RIGHT,
 606        TokenType.FULL,
 607    }
 608
 609    JOIN_KINDS = {
 610        TokenType.ANTI,
 611        TokenType.CROSS,
 612        TokenType.INNER,
 613        TokenType.OUTER,
 614        TokenType.SEMI,
 615        TokenType.STRAIGHT_JOIN,
 616    }
 617
 618    JOIN_HINTS: t.Set[str] = set()
 619
 620    LAMBDAS = {
 621        TokenType.ARROW: lambda self, expressions: self.expression(
 622            exp.Lambda,
 623            this=self._replace_lambda(
 624                self._parse_assignment(),
 625                expressions,
 626            ),
 627            expressions=expressions,
 628        ),
 629        TokenType.FARROW: lambda self, expressions: self.expression(
 630            exp.Kwarg,
 631            this=exp.var(expressions[0].name),
 632            expression=self._parse_assignment(),
 633        ),
 634    }
 635
 636    COLUMN_OPERATORS = {
 637        TokenType.DOT: None,
 638        TokenType.DCOLON: lambda self, this, to: self.expression(
 639            exp.Cast if self.STRICT_CAST else exp.TryCast,
 640            this=this,
 641            to=to,
 642        ),
 643        TokenType.ARROW: lambda self, this, path: self.expression(
 644            exp.JSONExtract,
 645            this=this,
 646            expression=self.dialect.to_json_path(path),
 647            only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
 648        ),
 649        TokenType.DARROW: lambda self, this, path: self.expression(
 650            exp.JSONExtractScalar,
 651            this=this,
 652            expression=self.dialect.to_json_path(path),
 653            only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
 654        ),
 655        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 656            exp.JSONBExtract,
 657            this=this,
 658            expression=path,
 659        ),
 660        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 661            exp.JSONBExtractScalar,
 662            this=this,
 663            expression=path,
 664        ),
 665        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 666            exp.JSONBContains,
 667            this=this,
 668            expression=key,
 669        ),
 670    }
 671
 672    EXPRESSION_PARSERS = {
 673        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
 674        exp.Column: lambda self: self._parse_column(),
 675        exp.Condition: lambda self: self._parse_assignment(),
 676        exp.DataType: lambda self: self._parse_types(allow_identifiers=False),
 677        exp.Expression: lambda self: self._parse_expression(),
 678        exp.From: lambda self: self._parse_from(joins=True),
 679        exp.Group: lambda self: self._parse_group(),
 680        exp.Having: lambda self: self._parse_having(),
 681        exp.Identifier: lambda self: self._parse_id_var(),
 682        exp.Join: lambda self: self._parse_join(),
 683        exp.Lambda: lambda self: self._parse_lambda(),
 684        exp.Lateral: lambda self: self._parse_lateral(),
 685        exp.Limit: lambda self: self._parse_limit(),
 686        exp.Offset: lambda self: self._parse_offset(),
 687        exp.Order: lambda self: self._parse_order(),
 688        exp.Ordered: lambda self: self._parse_ordered(),
 689        exp.Properties: lambda self: self._parse_properties(),
 690        exp.Qualify: lambda self: self._parse_qualify(),
 691        exp.Returning: lambda self: self._parse_returning(),
 692        exp.Select: lambda self: self._parse_select(),
 693        exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
 694        exp.Table: lambda self: self._parse_table_parts(),
 695        exp.TableAlias: lambda self: self._parse_table_alias(),
 696        exp.When: lambda self: seq_get(self._parse_when_matched(), 0),
 697        exp.Where: lambda self: self._parse_where(),
 698        exp.Window: lambda self: self._parse_named_window(),
 699        exp.With: lambda self: self._parse_with(),
 700        "JOIN_TYPE": lambda self: self._parse_join_parts(),
 701    }
 702
 703    STATEMENT_PARSERS = {
 704        TokenType.ALTER: lambda self: self._parse_alter(),
 705        TokenType.BEGIN: lambda self: self._parse_transaction(),
 706        TokenType.CACHE: lambda self: self._parse_cache(),
 707        TokenType.COMMENT: lambda self: self._parse_comment(),
 708        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 709        TokenType.COPY: lambda self: self._parse_copy(),
 710        TokenType.CREATE: lambda self: self._parse_create(),
 711        TokenType.DELETE: lambda self: self._parse_delete(),
 712        TokenType.DESC: lambda self: self._parse_describe(),
 713        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 714        TokenType.DROP: lambda self: self._parse_drop(),
 715        TokenType.INSERT: lambda self: self._parse_insert(),
 716        TokenType.KILL: lambda self: self._parse_kill(),
 717        TokenType.LOAD: lambda self: self._parse_load(),
 718        TokenType.MERGE: lambda self: self._parse_merge(),
 719        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 720        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 721        TokenType.REFRESH: lambda self: self._parse_refresh(),
 722        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 723        TokenType.SET: lambda self: self._parse_set(),
 724        TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
 725        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 726        TokenType.UPDATE: lambda self: self._parse_update(),
 727        TokenType.USE: lambda self: self.expression(
 728            exp.Use,
 729            kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False),
 730            this=self._parse_table(schema=False),
 731        ),
 732        TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon),
 733    }
 734
 735    UNARY_PARSERS = {
 736        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 737        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 738        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 739        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 740        TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()),
 741        TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()),
 742    }
 743
 744    STRING_PARSERS = {
 745        TokenType.HEREDOC_STRING: lambda self, token: self.expression(
 746            exp.RawString, this=token.text
 747        ),
 748        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 749            exp.National, this=token.text
 750        ),
 751        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 752        TokenType.STRING: lambda self, token: self.expression(
 753            exp.Literal, this=token.text, is_string=True
 754        ),
 755        TokenType.UNICODE_STRING: lambda self, token: self.expression(
 756            exp.UnicodeString,
 757            this=token.text,
 758            escape=self._match_text_seq("UESCAPE") and self._parse_string(),
 759        ),
 760    }
 761
 762    NUMERIC_PARSERS = {
 763        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 764        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 765        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 766        TokenType.NUMBER: lambda self, token: self.expression(
 767            exp.Literal, this=token.text, is_string=False
 768        ),
 769    }
 770
 771    PRIMARY_PARSERS = {
 772        **STRING_PARSERS,
 773        **NUMERIC_PARSERS,
 774        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 775        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 776        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 777        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 778        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 779        TokenType.STAR: lambda self, _: self.expression(
 780            exp.Star,
 781            **{
 782                "except": self._parse_star_op("EXCEPT", "EXCLUDE"),
 783                "replace": self._parse_star_op("REPLACE"),
 784                "rename": self._parse_star_op("RENAME"),
 785            },
 786        ),
 787    }
 788
 789    PLACEHOLDER_PARSERS = {
 790        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 791        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 792        TokenType.COLON: lambda self: (
 793            self.expression(exp.Placeholder, this=self._prev.text)
 794            if self._match_set(self.ID_VAR_TOKENS)
 795            else None
 796        ),
 797    }
 798
 799    RANGE_PARSERS = {
 800        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 801        TokenType.GLOB: binary_range_parser(exp.Glob),
 802        TokenType.ILIKE: binary_range_parser(exp.ILike),
 803        TokenType.IN: lambda self, this: self._parse_in(this),
 804        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 805        TokenType.IS: lambda self, this: self._parse_is(this),
 806        TokenType.LIKE: binary_range_parser(exp.Like),
 807        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 808        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 809        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 810        TokenType.FOR: lambda self, this: self._parse_comprehension(this),
 811    }
 812
 813    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 814        "ALLOWED_VALUES": lambda self: self.expression(
 815            exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary)
 816        ),
 817        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 818        "AUTO": lambda self: self._parse_auto_property(),
 819        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 820        "BACKUP": lambda self: self.expression(
 821            exp.BackupProperty, this=self._parse_var(any_token=True)
 822        ),
 823        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 824        "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
 825        "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
 826        "CHECKSUM": lambda self: self._parse_checksum(),
 827        "CLUSTER BY": lambda self: self._parse_cluster(),
 828        "CLUSTERED": lambda self: self._parse_clustered_by(),
 829        "COLLATE": lambda self, **kwargs: self._parse_property_assignment(
 830            exp.CollateProperty, **kwargs
 831        ),
 832        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 833        "CONTAINS": lambda self: self._parse_contains_property(),
 834        "COPY": lambda self: self._parse_copy_property(),
 835        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 836        "DATA_DELETION": lambda self: self._parse_data_deletion_property(),
 837        "DEFINER": lambda self: self._parse_definer(),
 838        "DETERMINISTIC": lambda self: self.expression(
 839            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 840        ),
 841        "DYNAMIC": lambda self: self.expression(exp.DynamicProperty),
 842        "DISTKEY": lambda self: self._parse_distkey(),
 843        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 844        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 845        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 846        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 847        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 848        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 849        "FREESPACE": lambda self: self._parse_freespace(),
 850        "GLOBAL": lambda self: self.expression(exp.GlobalProperty),
 851        "HEAP": lambda self: self.expression(exp.HeapProperty),
 852        "ICEBERG": lambda self: self.expression(exp.IcebergProperty),
 853        "IMMUTABLE": lambda self: self.expression(
 854            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 855        ),
 856        "INHERITS": lambda self: self.expression(
 857            exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table)
 858        ),
 859        "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()),
 860        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 861        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 862        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
 863        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
 864        "LIKE": lambda self: self._parse_create_like(),
 865        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 866        "LOCK": lambda self: self._parse_locking(),
 867        "LOCKING": lambda self: self._parse_locking(),
 868        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 869        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 870        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 871        "MODIFIES": lambda self: self._parse_modifies_property(),
 872        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 873        "NO": lambda self: self._parse_no_property(),
 874        "ON": lambda self: self._parse_on_property(),
 875        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 876        "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()),
 877        "PARTITION": lambda self: self._parse_partitioned_of(),
 878        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 879        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 880        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 881        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
 882        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
 883        "READS": lambda self: self._parse_reads_property(),
 884        "REMOTE": lambda self: self._parse_remote_with_connection(),
 885        "RETURNS": lambda self: self._parse_returns(),
 886        "STRICT": lambda self: self.expression(exp.StrictProperty),
 887        "ROW": lambda self: self._parse_row(),
 888        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 889        "SAMPLE": lambda self: self.expression(
 890            exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise()
 891        ),
 892        "SECURE": lambda self: self.expression(exp.SecureProperty),
 893        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 894        "SETTINGS": lambda self: self.expression(
 895            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 896        ),
 897        "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty),
 898        "SORTKEY": lambda self: self._parse_sortkey(),
 899        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
 900        "STABLE": lambda self: self.expression(
 901            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 902        ),
 903        "STORED": lambda self: self._parse_stored(),
 904        "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
 905        "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(),
 906        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 907        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 908        "TO": lambda self: self._parse_to_table(),
 909        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 910        "TRANSFORM": lambda self: self.expression(
 911            exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression)
 912        ),
 913        "TTL": lambda self: self._parse_ttl(),
 914        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 915        "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty),
 916        "VOLATILE": lambda self: self._parse_volatile_property(),
 917        "WITH": lambda self: self._parse_with_property(),
 918    }
 919
 920    CONSTRAINT_PARSERS = {
 921        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 922        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 923        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 924        "CHARACTER SET": lambda self: self.expression(
 925            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 926        ),
 927        "CHECK": lambda self: self.expression(
 928            exp.CheckColumnConstraint,
 929            this=self._parse_wrapped(self._parse_assignment),
 930            enforced=self._match_text_seq("ENFORCED"),
 931        ),
 932        "COLLATE": lambda self: self.expression(
 933            exp.CollateColumnConstraint,
 934            this=self._parse_identifier() or self._parse_column(),
 935        ),
 936        "COMMENT": lambda self: self.expression(
 937            exp.CommentColumnConstraint, this=self._parse_string()
 938        ),
 939        "COMPRESS": lambda self: self._parse_compress(),
 940        "CLUSTERED": lambda self: self.expression(
 941            exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
 942        ),
 943        "NONCLUSTERED": lambda self: self.expression(
 944            exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
 945        ),
 946        "DEFAULT": lambda self: self.expression(
 947            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 948        ),
 949        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 950        "EPHEMERAL": lambda self: self.expression(
 951            exp.EphemeralColumnConstraint, this=self._parse_bitwise()
 952        ),
 953        "EXCLUDE": lambda self: self.expression(
 954            exp.ExcludeColumnConstraint, this=self._parse_index_params()
 955        ),
 956        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 957        "FORMAT": lambda self: self.expression(
 958            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 959        ),
 960        "GENERATED": lambda self: self._parse_generated_as_identity(),
 961        "IDENTITY": lambda self: self._parse_auto_increment(),
 962        "INLINE": lambda self: self._parse_inline(),
 963        "LIKE": lambda self: self._parse_create_like(),
 964        "NOT": lambda self: self._parse_not_constraint(),
 965        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 966        "ON": lambda self: (
 967            self._match(TokenType.UPDATE)
 968            and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function())
 969        )
 970        or self.expression(exp.OnProperty, this=self._parse_id_var()),
 971        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 972        "PERIOD": lambda self: self._parse_period_for_system_time(),
 973        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 974        "REFERENCES": lambda self: self._parse_references(match=False),
 975        "TITLE": lambda self: self.expression(
 976            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 977        ),
 978        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 979        "UNIQUE": lambda self: self._parse_unique(),
 980        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 981        "WITH": lambda self: self.expression(
 982            exp.Properties, expressions=self._parse_wrapped_properties()
 983        ),
 984    }
 985
 986    ALTER_PARSERS = {
 987        "ADD": lambda self: self._parse_alter_table_add(),
 988        "ALTER": lambda self: self._parse_alter_table_alter(),
 989        "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True),
 990        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 991        "DROP": lambda self: self._parse_alter_table_drop(),
 992        "RENAME": lambda self: self._parse_alter_table_rename(),
 993        "SET": lambda self: self._parse_alter_table_set(),
 994    }
 995
 996    ALTER_ALTER_PARSERS = {
 997        "DISTKEY": lambda self: self._parse_alter_diststyle(),
 998        "DISTSTYLE": lambda self: self._parse_alter_diststyle(),
 999        "SORTKEY": lambda self: self._parse_alter_sortkey(),
1000        "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True),
1001    }
1002
1003    SCHEMA_UNNAMED_CONSTRAINTS = {
1004        "CHECK",
1005        "EXCLUDE",
1006        "FOREIGN KEY",
1007        "LIKE",
1008        "PERIOD",
1009        "PRIMARY KEY",
1010        "UNIQUE",
1011    }
1012
1013    NO_PAREN_FUNCTION_PARSERS = {
1014        "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
1015        "CASE": lambda self: self._parse_case(),
1016        "CONNECT_BY_ROOT": lambda self: self.expression(
1017            exp.ConnectByRoot, this=self._parse_column()
1018        ),
1019        "IF": lambda self: self._parse_if(),
1020        "NEXT": lambda self: self._parse_next_value_for(),
1021    }
1022
1023    INVALID_FUNC_NAME_TOKENS = {
1024        TokenType.IDENTIFIER,
1025        TokenType.STRING,
1026    }
1027
1028    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
1029
1030    KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice)
1031
1032    FUNCTION_PARSERS = {
1033        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
1034        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
1035        "DECODE": lambda self: self._parse_decode(),
1036        "EXTRACT": lambda self: self._parse_extract(),
1037        "GAP_FILL": lambda self: self._parse_gap_fill(),
1038        "JSON_OBJECT": lambda self: self._parse_json_object(),
1039        "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True),
1040        "JSON_TABLE": lambda self: self._parse_json_table(),
1041        "MATCH": lambda self: self._parse_match_against(),
1042        "OPENJSON": lambda self: self._parse_open_json(),
1043        "POSITION": lambda self: self._parse_position(),
1044        "PREDICT": lambda self: self._parse_predict(),
1045        "SAFE_CAST": lambda self: self._parse_cast(False, safe=True),
1046        "STRING_AGG": lambda self: self._parse_string_agg(),
1047        "SUBSTRING": lambda self: self._parse_substring(),
1048        "TRIM": lambda self: self._parse_trim(),
1049        "TRY_CAST": lambda self: self._parse_cast(False, safe=True),
1050        "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True),
1051    }
1052
1053    QUERY_MODIFIER_PARSERS = {
1054        TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()),
1055        TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()),
1056        TokenType.WHERE: lambda self: ("where", self._parse_where()),
1057        TokenType.GROUP_BY: lambda self: ("group", self._parse_group()),
1058        TokenType.HAVING: lambda self: ("having", self._parse_having()),
1059        TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()),
1060        TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()),
1061        TokenType.ORDER_BY: lambda self: ("order", self._parse_order()),
1062        TokenType.LIMIT: lambda self: ("limit", self._parse_limit()),
1063        TokenType.FETCH: lambda self: ("limit", self._parse_limit()),
1064        TokenType.OFFSET: lambda self: ("offset", self._parse_offset()),
1065        TokenType.FOR: lambda self: ("locks", self._parse_locks()),
1066        TokenType.LOCK: lambda self: ("locks", self._parse_locks()),
1067        TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
1068        TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
1069        TokenType.CLUSTER_BY: lambda self: (
1070            "cluster",
1071            self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
1072        ),
1073        TokenType.DISTRIBUTE_BY: lambda self: (
1074            "distribute",
1075            self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY),
1076        ),
1077        TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)),
1078        TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)),
1079        TokenType.START_WITH: lambda self: ("connect", self._parse_connect()),
1080    }
1081
1082    SET_PARSERS = {
1083        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
1084        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
1085        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
1086        "TRANSACTION": lambda self: self._parse_set_transaction(),
1087    }
1088
1089    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
1090
1091    TYPE_LITERAL_PARSERS = {
1092        exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this),
1093    }
1094
1095    TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {}
1096
1097    DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
1098
1099    PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
1100
1101    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
1102    TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = {
1103        "ISOLATION": (
1104            ("LEVEL", "REPEATABLE", "READ"),
1105            ("LEVEL", "READ", "COMMITTED"),
1106            ("LEVEL", "READ", "UNCOMITTED"),
1107            ("LEVEL", "SERIALIZABLE"),
1108        ),
1109        "READ": ("WRITE", "ONLY"),
1110    }
1111
1112    CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys(
1113        ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()
1114    )
1115    CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE")
1116
1117    CREATE_SEQUENCE: OPTIONS_TYPE = {
1118        "SCALE": ("EXTEND", "NOEXTEND"),
1119        "SHARD": ("EXTEND", "NOEXTEND"),
1120        "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"),
1121        **dict.fromkeys(
1122            (
1123                "SESSION",
1124                "GLOBAL",
1125                "KEEP",
1126                "NOKEEP",
1127                "ORDER",
1128                "NOORDER",
1129                "NOCACHE",
1130                "CYCLE",
1131                "NOCYCLE",
1132                "NOMINVALUE",
1133                "NOMAXVALUE",
1134                "NOSCALE",
1135                "NOSHARD",
1136            ),
1137            tuple(),
1138        ),
1139    }
1140
1141    ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")}
1142
1143    USABLES: OPTIONS_TYPE = dict.fromkeys(
1144        ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple()
1145    )
1146
1147    CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",))
1148
1149    SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = {
1150        "TYPE": ("EVOLUTION",),
1151        **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()),
1152    }
1153
1154    KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = {
1155        "NOT": ("ENFORCED",),
1156        "MATCH": (
1157            "FULL",
1158            "PARTIAL",
1159            "SIMPLE",
1160        ),
1161        "INITIALLY": ("DEFERRED", "IMMEDIATE"),
1162        **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()),
1163    }
1164
1165    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
1166
1167    CLONE_KEYWORDS = {"CLONE", "COPY"}
1168    HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"}
1169    HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"}
1170
1171    OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"}
1172
1173    OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
1174
1175    TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
1176
1177    VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"}
1178
1179    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
1180    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
1181    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
1182
1183    JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS}
1184
1185    FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT}
1186
1187    ADD_CONSTRAINT_TOKENS = {
1188        TokenType.CONSTRAINT,
1189        TokenType.FOREIGN_KEY,
1190        TokenType.INDEX,
1191        TokenType.KEY,
1192        TokenType.PRIMARY_KEY,
1193        TokenType.UNIQUE,
1194    }
1195
1196    DISTINCT_TOKENS = {TokenType.DISTINCT}
1197
1198    NULL_TOKENS = {TokenType.NULL}
1199
1200    UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS
1201
1202    SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT}
1203
1204    COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"}
1205
1206    STRICT_CAST = True
1207
1208    PREFIXED_PIVOT_COLUMNS = False
1209    IDENTIFY_PIVOT_STRINGS = False
1210
1211    LOG_DEFAULTS_TO_LN = False
1212
1213    # Whether ADD is present for each column added by ALTER TABLE
1214    ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True
1215
1216    # Whether the table sample clause expects CSV syntax
1217    TABLESAMPLE_CSV = False
1218
1219    # The default method used for table sampling
1220    DEFAULT_SAMPLING_METHOD: t.Optional[str] = None
1221
1222    # Whether the SET command needs a delimiter (e.g. "=") for assignments
1223    SET_REQUIRES_ASSIGNMENT_DELIMITER = True
1224
1225    # Whether the TRIM function expects the characters to trim as its first argument
1226    TRIM_PATTERN_FIRST = False
1227
1228    # Whether string aliases are supported `SELECT COUNT(*) 'count'`
1229    STRING_ALIASES = False
1230
1231    # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand)
1232    MODIFIERS_ATTACHED_TO_SET_OP = True
1233    SET_OP_MODIFIERS = {"order", "limit", "offset"}
1234
1235    # Whether to parse IF statements that aren't followed by a left parenthesis as commands
1236    NO_PAREN_IF_COMMANDS = True
1237
1238    # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres)
1239    JSON_ARROWS_REQUIRE_JSON_TYPE = False
1240
1241    # Whether the `:` operator is used to extract a value from a VARIANT column
1242    COLON_IS_VARIANT_EXTRACT = False
1243
1244    # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause.
1245    # If this is True and '(' is not found, the keyword will be treated as an identifier
1246    VALUES_FOLLOWED_BY_PAREN = True
1247
1248    # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift)
1249    SUPPORTS_IMPLICIT_UNNEST = False
1250
1251    # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS
1252    INTERVAL_SPANS = True
1253
1254    # Whether a PARTITION clause can follow a table reference
1255    SUPPORTS_PARTITION_SELECTION = False
1256
1257    __slots__ = (
1258        "error_level",
1259        "error_message_context",
1260        "max_errors",
1261        "dialect",
1262        "sql",
1263        "errors",
1264        "_tokens",
1265        "_index",
1266        "_curr",
1267        "_next",
1268        "_prev",
1269        "_prev_comments",
1270    )
1271
1272    # Autofilled
1273    SHOW_TRIE: t.Dict = {}
1274    SET_TRIE: t.Dict = {}
1275
1276    def __init__(
1277        self,
1278        error_level: t.Optional[ErrorLevel] = None,
1279        error_message_context: int = 100,
1280        max_errors: int = 3,
1281        dialect: DialectType = None,
1282    ):
1283        from sqlglot.dialects import Dialect
1284
1285        self.error_level = error_level or ErrorLevel.IMMEDIATE
1286        self.error_message_context = error_message_context
1287        self.max_errors = max_errors
1288        self.dialect = Dialect.get_or_raise(dialect)
1289        self.reset()
1290
1291    def reset(self):
1292        self.sql = ""
1293        self.errors = []
1294        self._tokens = []
1295        self._index = 0
1296        self._curr = None
1297        self._next = None
1298        self._prev = None
1299        self._prev_comments = None
1300
1301    def parse(
1302        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
1303    ) -> t.List[t.Optional[exp.Expression]]:
1304        """
1305        Parses a list of tokens and returns a list of syntax trees, one tree
1306        per parsed SQL statement.
1307
1308        Args:
1309            raw_tokens: The list of tokens.
1310            sql: The original SQL string, used to produce helpful debug messages.
1311
1312        Returns:
1313            The list of the produced syntax trees.
1314        """
1315        return self._parse(
1316            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
1317        )
1318
1319    def parse_into(
1320        self,
1321        expression_types: exp.IntoType,
1322        raw_tokens: t.List[Token],
1323        sql: t.Optional[str] = None,
1324    ) -> t.List[t.Optional[exp.Expression]]:
1325        """
1326        Parses a list of tokens into a given Expression type. If a collection of Expression
1327        types is given instead, this method will try to parse the token list into each one
1328        of them, stopping at the first for which the parsing succeeds.
1329
1330        Args:
1331            expression_types: The expression type(s) to try and parse the token list into.
1332            raw_tokens: The list of tokens.
1333            sql: The original SQL string, used to produce helpful debug messages.
1334
1335        Returns:
1336            The target Expression.
1337        """
1338        errors = []
1339        for expression_type in ensure_list(expression_types):
1340            parser = self.EXPRESSION_PARSERS.get(expression_type)
1341            if not parser:
1342                raise TypeError(f"No parser registered for {expression_type}")
1343
1344            try:
1345                return self._parse(parser, raw_tokens, sql)
1346            except ParseError as e:
1347                e.errors[0]["into_expression"] = expression_type
1348                errors.append(e)
1349
1350        raise ParseError(
1351            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
1352            errors=merge_errors(errors),
1353        ) from errors[-1]
1354
1355    def _parse(
1356        self,
1357        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
1358        raw_tokens: t.List[Token],
1359        sql: t.Optional[str] = None,
1360    ) -> t.List[t.Optional[exp.Expression]]:
1361        self.reset()
1362        self.sql = sql or ""
1363
1364        total = len(raw_tokens)
1365        chunks: t.List[t.List[Token]] = [[]]
1366
1367        for i, token in enumerate(raw_tokens):
1368            if token.token_type == TokenType.SEMICOLON:
1369                if token.comments:
1370                    chunks.append([token])
1371
1372                if i < total - 1:
1373                    chunks.append([])
1374            else:
1375                chunks[-1].append(token)
1376
1377        expressions = []
1378
1379        for tokens in chunks:
1380            self._index = -1
1381            self._tokens = tokens
1382            self._advance()
1383
1384            expressions.append(parse_method(self))
1385
1386            if self._index < len(self._tokens):
1387                self.raise_error("Invalid expression / Unexpected token")
1388
1389            self.check_errors()
1390
1391        return expressions
1392
1393    def check_errors(self) -> None:
1394        """Logs or raises any found errors, depending on the chosen error level setting."""
1395        if self.error_level == ErrorLevel.WARN:
1396            for error in self.errors:
1397                logger.error(str(error))
1398        elif self.error_level == ErrorLevel.RAISE and self.errors:
1399            raise ParseError(
1400                concat_messages(self.errors, self.max_errors),
1401                errors=merge_errors(self.errors),
1402            )
1403
1404    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
1405        """
1406        Appends an error in the list of recorded errors or raises it, depending on the chosen
1407        error level setting.
1408        """
1409        token = token or self._curr or self._prev or Token.string("")
1410        start = token.start
1411        end = token.end + 1
1412        start_context = self.sql[max(start - self.error_message_context, 0) : start]
1413        highlight = self.sql[start:end]
1414        end_context = self.sql[end : end + self.error_message_context]
1415
1416        error = ParseError.new(
1417            f"{message}. Line {token.line}, Col: {token.col}.\n"
1418            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
1419            description=message,
1420            line=token.line,
1421            col=token.col,
1422            start_context=start_context,
1423            highlight=highlight,
1424            end_context=end_context,
1425        )
1426
1427        if self.error_level == ErrorLevel.IMMEDIATE:
1428            raise error
1429
1430        self.errors.append(error)
1431
1432    def expression(
1433        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
1434    ) -> E:
1435        """
1436        Creates a new, validated Expression.
1437
1438        Args:
1439            exp_class: The expression class to instantiate.
1440            comments: An optional list of comments to attach to the expression.
1441            kwargs: The arguments to set for the expression along with their respective values.
1442
1443        Returns:
1444            The target expression.
1445        """
1446        instance = exp_class(**kwargs)
1447        instance.add_comments(comments) if comments else self._add_comments(instance)
1448        return self.validate_expression(instance)
1449
1450    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
1451        if expression and self._prev_comments:
1452            expression.add_comments(self._prev_comments)
1453            self._prev_comments = None
1454
1455    def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E:
1456        """
1457        Validates an Expression, making sure that all its mandatory arguments are set.
1458
1459        Args:
1460            expression: The expression to validate.
1461            args: An optional list of items that was used to instantiate the expression, if it's a Func.
1462
1463        Returns:
1464            The validated expression.
1465        """
1466        if self.error_level != ErrorLevel.IGNORE:
1467            for error_message in expression.error_messages(args):
1468                self.raise_error(error_message)
1469
1470        return expression
1471
1472    def _find_sql(self, start: Token, end: Token) -> str:
1473        return self.sql[start.start : end.end + 1]
1474
1475    def _is_connected(self) -> bool:
1476        return self._prev and self._curr and self._prev.end + 1 == self._curr.start
1477
1478    def _advance(self, times: int = 1) -> None:
1479        self._index += times
1480        self._curr = seq_get(self._tokens, self._index)
1481        self._next = seq_get(self._tokens, self._index + 1)
1482
1483        if self._index > 0:
1484            self._prev = self._tokens[self._index - 1]
1485            self._prev_comments = self._prev.comments
1486        else:
1487            self._prev = None
1488            self._prev_comments = None
1489
1490    def _retreat(self, index: int) -> None:
1491        if index != self._index:
1492            self._advance(index - self._index)
1493
1494    def _warn_unsupported(self) -> None:
1495        if len(self._tokens) <= 1:
1496            return
1497
1498        # We use _find_sql because self.sql may comprise multiple chunks, and we're only
1499        # interested in emitting a warning for the one being currently processed.
1500        sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context]
1501
1502        logger.warning(
1503            f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'."
1504        )
1505
1506    def _parse_command(self) -> exp.Command:
1507        self._warn_unsupported()
1508        return self.expression(
1509            exp.Command,
1510            comments=self._prev_comments,
1511            this=self._prev.text.upper(),
1512            expression=self._parse_string(),
1513        )
1514
1515    def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]:
1516        """
1517        Attemps to backtrack if a parse function that contains a try/catch internally raises an error.
1518        This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to
1519        solve this by setting & resetting the parser state accordingly
1520        """
1521        index = self._index
1522        error_level = self.error_level
1523
1524        self.error_level = ErrorLevel.IMMEDIATE
1525        try:
1526            this = parse_method()
1527        except ParseError:
1528            this = None
1529        finally:
1530            if not this or retreat:
1531                self._retreat(index)
1532            self.error_level = error_level
1533
1534        return this
1535
1536    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1537        start = self._prev
1538        exists = self._parse_exists() if allow_exists else None
1539
1540        self._match(TokenType.ON)
1541
1542        materialized = self._match_text_seq("MATERIALIZED")
1543        kind = self._match_set(self.CREATABLES) and self._prev
1544        if not kind:
1545            return self._parse_as_command(start)
1546
1547        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1548            this = self._parse_user_defined_function(kind=kind.token_type)
1549        elif kind.token_type == TokenType.TABLE:
1550            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1551        elif kind.token_type == TokenType.COLUMN:
1552            this = self._parse_column()
1553        else:
1554            this = self._parse_id_var()
1555
1556        self._match(TokenType.IS)
1557
1558        return self.expression(
1559            exp.Comment,
1560            this=this,
1561            kind=kind.text,
1562            expression=self._parse_string(),
1563            exists=exists,
1564            materialized=materialized,
1565        )
1566
1567    def _parse_to_table(
1568        self,
1569    ) -> exp.ToTableProperty:
1570        table = self._parse_table_parts(schema=True)
1571        return self.expression(exp.ToTableProperty, this=table)
1572
1573    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1574    def _parse_ttl(self) -> exp.Expression:
1575        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1576            this = self._parse_bitwise()
1577
1578            if self._match_text_seq("DELETE"):
1579                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1580            if self._match_text_seq("RECOMPRESS"):
1581                return self.expression(
1582                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1583                )
1584            if self._match_text_seq("TO", "DISK"):
1585                return self.expression(
1586                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1587                )
1588            if self._match_text_seq("TO", "VOLUME"):
1589                return self.expression(
1590                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1591                )
1592
1593            return this
1594
1595        expressions = self._parse_csv(_parse_ttl_action)
1596        where = self._parse_where()
1597        group = self._parse_group()
1598
1599        aggregates = None
1600        if group and self._match(TokenType.SET):
1601            aggregates = self._parse_csv(self._parse_set_item)
1602
1603        return self.expression(
1604            exp.MergeTreeTTL,
1605            expressions=expressions,
1606            where=where,
1607            group=group,
1608            aggregates=aggregates,
1609        )
1610
1611    def _parse_statement(self) -> t.Optional[exp.Expression]:
1612        if self._curr is None:
1613            return None
1614
1615        if self._match_set(self.STATEMENT_PARSERS):
1616            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1617
1618        if self._match_set(self.dialect.tokenizer.COMMANDS):
1619            return self._parse_command()
1620
1621        expression = self._parse_expression()
1622        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1623        return self._parse_query_modifiers(expression)
1624
1625    def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command:
1626        start = self._prev
1627        temporary = self._match(TokenType.TEMPORARY)
1628        materialized = self._match_text_seq("MATERIALIZED")
1629
1630        kind = self._match_set(self.CREATABLES) and self._prev.text
1631        if not kind:
1632            return self._parse_as_command(start)
1633
1634        if_exists = exists or self._parse_exists()
1635        table = self._parse_table_parts(
1636            schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA
1637        )
1638
1639        cluster = self._parse_on_property() if self._match(TokenType.ON) else None
1640
1641        if self._match(TokenType.L_PAREN, advance=False):
1642            expressions = self._parse_wrapped_csv(self._parse_types)
1643        else:
1644            expressions = None
1645
1646        return self.expression(
1647            exp.Drop,
1648            comments=start.comments,
1649            exists=if_exists,
1650            this=table,
1651            expressions=expressions,
1652            kind=kind.upper(),
1653            temporary=temporary,
1654            materialized=materialized,
1655            cascade=self._match_text_seq("CASCADE"),
1656            constraints=self._match_text_seq("CONSTRAINTS"),
1657            purge=self._match_text_seq("PURGE"),
1658            cluster=cluster,
1659        )
1660
1661    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1662        return (
1663            self._match_text_seq("IF")
1664            and (not not_ or self._match(TokenType.NOT))
1665            and self._match(TokenType.EXISTS)
1666        )
1667
1668    def _parse_create(self) -> exp.Create | exp.Command:
1669        # Note: this can't be None because we've matched a statement parser
1670        start = self._prev
1671        comments = self._prev_comments
1672
1673        replace = (
1674            start.token_type == TokenType.REPLACE
1675            or self._match_pair(TokenType.OR, TokenType.REPLACE)
1676            or self._match_pair(TokenType.OR, TokenType.ALTER)
1677        )
1678
1679        unique = self._match(TokenType.UNIQUE)
1680
1681        if self._match_text_seq("CLUSTERED", "COLUMNSTORE"):
1682            clustered = True
1683        elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq(
1684            "COLUMNSTORE"
1685        ):
1686            clustered = False
1687        else:
1688            clustered = None
1689
1690        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1691            self._advance()
1692
1693        properties = None
1694        create_token = self._match_set(self.CREATABLES) and self._prev
1695
1696        if not create_token:
1697            # exp.Properties.Location.POST_CREATE
1698            properties = self._parse_properties()
1699            create_token = self._match_set(self.CREATABLES) and self._prev
1700
1701            if not properties or not create_token:
1702                return self._parse_as_command(start)
1703
1704        concurrently = self._match_text_seq("CONCURRENTLY")
1705        exists = self._parse_exists(not_=True)
1706        this = None
1707        expression: t.Optional[exp.Expression] = None
1708        indexes = None
1709        no_schema_binding = None
1710        begin = None
1711        end = None
1712        clone = None
1713
1714        def extend_props(temp_props: t.Optional[exp.Properties]) -> None:
1715            nonlocal properties
1716            if properties and temp_props:
1717                properties.expressions.extend(temp_props.expressions)
1718            elif temp_props:
1719                properties = temp_props
1720
1721        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1722            this = self._parse_user_defined_function(kind=create_token.token_type)
1723
1724            # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
1725            extend_props(self._parse_properties())
1726
1727            expression = self._match(TokenType.ALIAS) and self._parse_heredoc()
1728            extend_props(self._parse_properties())
1729
1730            if not expression:
1731                if self._match(TokenType.COMMAND):
1732                    expression = self._parse_as_command(self._prev)
1733                else:
1734                    begin = self._match(TokenType.BEGIN)
1735                    return_ = self._match_text_seq("RETURN")
1736
1737                    if self._match(TokenType.STRING, advance=False):
1738                        # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property
1739                        # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement
1740                        expression = self._parse_string()
1741                        extend_props(self._parse_properties())
1742                    else:
1743                        expression = self._parse_statement()
1744
1745                    end = self._match_text_seq("END")
1746
1747                    if return_:
1748                        expression = self.expression(exp.Return, this=expression)
1749        elif create_token.token_type == TokenType.INDEX:
1750            # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c)
1751            if not self._match(TokenType.ON):
1752                index = self._parse_id_var()
1753                anonymous = False
1754            else:
1755                index = None
1756                anonymous = True
1757
1758            this = self._parse_index(index=index, anonymous=anonymous)
1759        elif create_token.token_type in self.DB_CREATABLES:
1760            table_parts = self._parse_table_parts(
1761                schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA
1762            )
1763
1764            # exp.Properties.Location.POST_NAME
1765            self._match(TokenType.COMMA)
1766            extend_props(self._parse_properties(before=True))
1767
1768            this = self._parse_schema(this=table_parts)
1769
1770            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1771            extend_props(self._parse_properties())
1772
1773            self._match(TokenType.ALIAS)
1774            if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
1775                # exp.Properties.Location.POST_ALIAS
1776                extend_props(self._parse_properties())
1777
1778            if create_token.token_type == TokenType.SEQUENCE:
1779                expression = self._parse_types()
1780                extend_props(self._parse_properties())
1781            else:
1782                expression = self._parse_ddl_select()
1783
1784            if create_token.token_type == TokenType.TABLE:
1785                # exp.Properties.Location.POST_EXPRESSION
1786                extend_props(self._parse_properties())
1787
1788                indexes = []
1789                while True:
1790                    index = self._parse_index()
1791
1792                    # exp.Properties.Location.POST_INDEX
1793                    extend_props(self._parse_properties())
1794
1795                    if not index:
1796                        break
1797                    else:
1798                        self._match(TokenType.COMMA)
1799                        indexes.append(index)
1800            elif create_token.token_type == TokenType.VIEW:
1801                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1802                    no_schema_binding = True
1803
1804            shallow = self._match_text_seq("SHALLOW")
1805
1806            if self._match_texts(self.CLONE_KEYWORDS):
1807                copy = self._prev.text.lower() == "copy"
1808                clone = self.expression(
1809                    exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy
1810                )
1811
1812        if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False):
1813            return self._parse_as_command(start)
1814
1815        return self.expression(
1816            exp.Create,
1817            comments=comments,
1818            this=this,
1819            kind=create_token.text.upper(),
1820            replace=replace,
1821            unique=unique,
1822            expression=expression,
1823            exists=exists,
1824            properties=properties,
1825            indexes=indexes,
1826            no_schema_binding=no_schema_binding,
1827            begin=begin,
1828            end=end,
1829            clone=clone,
1830            concurrently=concurrently,
1831            clustered=clustered,
1832        )
1833
1834    def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]:
1835        seq = exp.SequenceProperties()
1836
1837        options = []
1838        index = self._index
1839
1840        while self._curr:
1841            self._match(TokenType.COMMA)
1842            if self._match_text_seq("INCREMENT"):
1843                self._match_text_seq("BY")
1844                self._match_text_seq("=")
1845                seq.set("increment", self._parse_term())
1846            elif self._match_text_seq("MINVALUE"):
1847                seq.set("minvalue", self._parse_term())
1848            elif self._match_text_seq("MAXVALUE"):
1849                seq.set("maxvalue", self._parse_term())
1850            elif self._match(TokenType.START_WITH) or self._match_text_seq("START"):
1851                self._match_text_seq("=")
1852                seq.set("start", self._parse_term())
1853            elif self._match_text_seq("CACHE"):
1854                # T-SQL allows empty CACHE which is initialized dynamically
1855                seq.set("cache", self._parse_number() or True)
1856            elif self._match_text_seq("OWNED", "BY"):
1857                # "OWNED BY NONE" is the default
1858                seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column())
1859            else:
1860                opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False)
1861                if opt:
1862                    options.append(opt)
1863                else:
1864                    break
1865
1866        seq.set("options", options if options else None)
1867        return None if self._index == index else seq
1868
1869    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1870        # only used for teradata currently
1871        self._match(TokenType.COMMA)
1872
1873        kwargs = {
1874            "no": self._match_text_seq("NO"),
1875            "dual": self._match_text_seq("DUAL"),
1876            "before": self._match_text_seq("BEFORE"),
1877            "default": self._match_text_seq("DEFAULT"),
1878            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1879            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1880            "after": self._match_text_seq("AFTER"),
1881            "minimum": self._match_texts(("MIN", "MINIMUM")),
1882            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1883        }
1884
1885        if self._match_texts(self.PROPERTY_PARSERS):
1886            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1887            try:
1888                return parser(self, **{k: v for k, v in kwargs.items() if v})
1889            except TypeError:
1890                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1891
1892        return None
1893
1894    def _parse_wrapped_properties(self) -> t.List[exp.Expression]:
1895        return self._parse_wrapped_csv(self._parse_property)
1896
1897    def _parse_property(self) -> t.Optional[exp.Expression]:
1898        if self._match_texts(self.PROPERTY_PARSERS):
1899            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1900
1901        if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS):
1902            return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True)
1903
1904        if self._match_text_seq("COMPOUND", "SORTKEY"):
1905            return self._parse_sortkey(compound=True)
1906
1907        if self._match_text_seq("SQL", "SECURITY"):
1908            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1909
1910        index = self._index
1911        key = self._parse_column()
1912
1913        if not self._match(TokenType.EQ):
1914            self._retreat(index)
1915            return self._parse_sequence_properties()
1916
1917        # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise
1918        if isinstance(key, exp.Column):
1919            key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name)
1920
1921        value = self._parse_bitwise() or self._parse_var(any_token=True)
1922
1923        # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier())
1924        if isinstance(value, exp.Column):
1925            value = exp.var(value.name)
1926
1927        return self.expression(exp.Property, this=key, value=value)
1928
1929    def _parse_stored(self) -> exp.FileFormatProperty:
1930        self._match(TokenType.ALIAS)
1931
1932        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1933        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1934
1935        return self.expression(
1936            exp.FileFormatProperty,
1937            this=(
1938                self.expression(
1939                    exp.InputOutputFormat, input_format=input_format, output_format=output_format
1940                )
1941                if input_format or output_format
1942                else self._parse_var_or_string() or self._parse_number() or self._parse_id_var()
1943            ),
1944        )
1945
1946    def _parse_unquoted_field(self) -> t.Optional[exp.Expression]:
1947        field = self._parse_field()
1948        if isinstance(field, exp.Identifier) and not field.quoted:
1949            field = exp.var(field)
1950
1951        return field
1952
1953    def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E:
1954        self._match(TokenType.EQ)
1955        self._match(TokenType.ALIAS)
1956
1957        return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs)
1958
1959    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]:
1960        properties = []
1961        while True:
1962            if before:
1963                prop = self._parse_property_before()
1964            else:
1965                prop = self._parse_property()
1966            if not prop:
1967                break
1968            for p in ensure_list(prop):
1969                properties.append(p)
1970
1971        if properties:
1972            return self.expression(exp.Properties, expressions=properties)
1973
1974        return None
1975
1976    def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
1977        return self.expression(
1978            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1979        )
1980
1981    def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
1982        if self._index >= 2:
1983            pre_volatile_token = self._tokens[self._index - 2]
1984        else:
1985            pre_volatile_token = None
1986
1987        if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
1988            return exp.VolatileProperty()
1989
1990        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1991
1992    def _parse_retention_period(self) -> exp.Var:
1993        # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...}
1994        number = self._parse_number()
1995        number_str = f"{number} " if number else ""
1996        unit = self._parse_var(any_token=True)
1997        return exp.var(f"{number_str}{unit}")
1998
1999    def _parse_system_versioning_property(
2000        self, with_: bool = False
2001    ) -> exp.WithSystemVersioningProperty:
2002        self._match(TokenType.EQ)
2003        prop = self.expression(
2004            exp.WithSystemVersioningProperty,
2005            **{  # type: ignore
2006                "on": True,
2007                "with": with_,
2008            },
2009        )
2010
2011        if self._match_text_seq("OFF"):
2012            prop.set("on", False)
2013            return prop
2014
2015        self._match(TokenType.ON)
2016        if self._match(TokenType.L_PAREN):
2017            while self._curr and not self._match(TokenType.R_PAREN):
2018                if self._match_text_seq("HISTORY_TABLE", "="):
2019                    prop.set("this", self._parse_table_parts())
2020                elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="):
2021                    prop.set("data_consistency", self._advance_any() and self._prev.text.upper())
2022                elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="):
2023                    prop.set("retention_period", self._parse_retention_period())
2024
2025                self._match(TokenType.COMMA)
2026
2027        return prop
2028
2029    def _parse_data_deletion_property(self) -> exp.DataDeletionProperty:
2030        self._match(TokenType.EQ)
2031        on = self._match_text_seq("ON") or not self._match_text_seq("OFF")
2032        prop = self.expression(exp.DataDeletionProperty, on=on)
2033
2034        if self._match(TokenType.L_PAREN):
2035            while self._curr and not self._match(TokenType.R_PAREN):
2036                if self._match_text_seq("FILTER_COLUMN", "="):
2037                    prop.set("filter_column", self._parse_column())
2038                elif self._match_text_seq("RETENTION_PERIOD", "="):
2039                    prop.set("retention_period", self._parse_retention_period())
2040
2041                self._match(TokenType.COMMA)
2042
2043        return prop
2044
2045    def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
2046        if self._match_text_seq("(", "SYSTEM_VERSIONING"):
2047            prop = self._parse_system_versioning_property(with_=True)
2048            self._match_r_paren()
2049            return prop
2050
2051        if self._match(TokenType.L_PAREN, advance=False):
2052            return self._parse_wrapped_properties()
2053
2054        if self._match_text_seq("JOURNAL"):
2055            return self._parse_withjournaltable()
2056
2057        if self._match_texts(self.VIEW_ATTRIBUTES):
2058            return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper())
2059
2060        if self._match_text_seq("DATA"):
2061            return self._parse_withdata(no=False)
2062        elif self._match_text_seq("NO", "DATA"):
2063            return self._parse_withdata(no=True)
2064
2065        if self._match(TokenType.SERDE_PROPERTIES, advance=False):
2066            return self._parse_serde_properties(with_=True)
2067
2068        if self._match(TokenType.SCHEMA):
2069            return self.expression(
2070                exp.WithSchemaBindingProperty,
2071                this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS),
2072            )
2073
2074        if not self._next:
2075            return None
2076
2077        return self._parse_withisolatedloading()
2078
2079    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
2080    def _parse_definer(self) -> t.Optional[exp.DefinerProperty]:
2081        self._match(TokenType.EQ)
2082
2083        user = self._parse_id_var()
2084        self._match(TokenType.PARAMETER)
2085        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
2086
2087        if not user or not host:
2088            return None
2089
2090        return exp.DefinerProperty(this=f"{user}@{host}")
2091
2092    def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
2093        self._match(TokenType.TABLE)
2094        self._match(TokenType.EQ)
2095        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
2096
2097    def _parse_log(self, no: bool = False) -> exp.LogProperty:
2098        return self.expression(exp.LogProperty, no=no)
2099
2100    def _parse_journal(self, **kwargs) -> exp.JournalProperty:
2101        return self.expression(exp.JournalProperty, **kwargs)
2102
2103    def _parse_checksum(self) -> exp.ChecksumProperty:
2104        self._match(TokenType.EQ)
2105
2106        on = None
2107        if self._match(TokenType.ON):
2108            on = True
2109        elif self._match_text_seq("OFF"):
2110            on = False
2111
2112        return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
2113
2114    def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster:
2115        return self.expression(
2116            exp.Cluster,
2117            expressions=(
2118                self._parse_wrapped_csv(self._parse_ordered)
2119                if wrapped
2120                else self._parse_csv(self._parse_ordered)
2121            ),
2122        )
2123
2124    def _parse_clustered_by(self) -> exp.ClusteredByProperty:
2125        self._match_text_seq("BY")
2126
2127        self._match_l_paren()
2128        expressions = self._parse_csv(self._parse_column)
2129        self._match_r_paren()
2130
2131        if self._match_text_seq("SORTED", "BY"):
2132            self._match_l_paren()
2133            sorted_by = self._parse_csv(self._parse_ordered)
2134            self._match_r_paren()
2135        else:
2136            sorted_by = None
2137
2138        self._match(TokenType.INTO)
2139        buckets = self._parse_number()
2140        self._match_text_seq("BUCKETS")
2141
2142        return self.expression(
2143            exp.ClusteredByProperty,
2144            expressions=expressions,
2145            sorted_by=sorted_by,
2146            buckets=buckets,
2147        )
2148
2149    def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]:
2150        if not self._match_text_seq("GRANTS"):
2151            self._retreat(self._index - 1)
2152            return None
2153
2154        return self.expression(exp.CopyGrantsProperty)
2155
2156    def _parse_freespace(self) -> exp.FreespaceProperty:
2157        self._match(TokenType.EQ)
2158        return self.expression(
2159            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
2160        )
2161
2162    def _parse_mergeblockratio(
2163        self, no: bool = False, default: bool = False
2164    ) -> exp.MergeBlockRatioProperty:
2165        if self._match(TokenType.EQ):
2166            return self.expression(
2167                exp.MergeBlockRatioProperty,
2168                this=self._parse_number(),
2169                percent=self._match(TokenType.PERCENT),
2170            )
2171
2172        return self.expression(exp.MergeBlockRatioProperty, no=no, default=default)
2173
2174    def _parse_datablocksize(
2175        self,
2176        default: t.Optional[bool] = None,
2177        minimum: t.Optional[bool] = None,
2178        maximum: t.Optional[bool] = None,
2179    ) -> exp.DataBlocksizeProperty:
2180        self._match(TokenType.EQ)
2181        size = self._parse_number()
2182
2183        units = None
2184        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
2185            units = self._prev.text
2186
2187        return self.expression(
2188            exp.DataBlocksizeProperty,
2189            size=size,
2190            units=units,
2191            default=default,
2192            minimum=minimum,
2193            maximum=maximum,
2194        )
2195
2196    def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
2197        self._match(TokenType.EQ)
2198        always = self._match_text_seq("ALWAYS")
2199        manual = self._match_text_seq("MANUAL")
2200        never = self._match_text_seq("NEVER")
2201        default = self._match_text_seq("DEFAULT")
2202
2203        autotemp = None
2204        if self._match_text_seq("AUTOTEMP"):
2205            autotemp = self._parse_schema()
2206
2207        return self.expression(
2208            exp.BlockCompressionProperty,
2209            always=always,
2210            manual=manual,
2211            never=never,
2212            default=default,
2213            autotemp=autotemp,
2214        )
2215
2216    def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]:
2217        index = self._index
2218        no = self._match_text_seq("NO")
2219        concurrent = self._match_text_seq("CONCURRENT")
2220
2221        if not self._match_text_seq("ISOLATED", "LOADING"):
2222            self._retreat(index)
2223            return None
2224
2225        target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False)
2226        return self.expression(
2227            exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target
2228        )
2229
2230    def _parse_locking(self) -> exp.LockingProperty:
2231        if self._match(TokenType.TABLE):
2232            kind = "TABLE"
2233        elif self._match(TokenType.VIEW):
2234            kind = "VIEW"
2235        elif self._match(TokenType.ROW):
2236            kind = "ROW"
2237        elif self._match_text_seq("DATABASE"):
2238            kind = "DATABASE"
2239        else:
2240            kind = None
2241
2242        if kind in ("DATABASE", "TABLE", "VIEW"):
2243            this = self._parse_table_parts()
2244        else:
2245            this = None
2246
2247        if self._match(TokenType.FOR):
2248            for_or_in = "FOR"
2249        elif self._match(TokenType.IN):
2250            for_or_in = "IN"
2251        else:
2252            for_or_in = None
2253
2254        if self._match_text_seq("ACCESS"):
2255            lock_type = "ACCESS"
2256        elif self._match_texts(("EXCL", "EXCLUSIVE")):
2257            lock_type = "EXCLUSIVE"
2258        elif self._match_text_seq("SHARE"):
2259            lock_type = "SHARE"
2260        elif self._match_text_seq("READ"):
2261            lock_type = "READ"
2262        elif self._match_text_seq("WRITE"):
2263            lock_type = "WRITE"
2264        elif self._match_text_seq("CHECKSUM"):
2265            lock_type = "CHECKSUM"
2266        else:
2267            lock_type = None
2268
2269        override = self._match_text_seq("OVERRIDE")
2270
2271        return self.expression(
2272            exp.LockingProperty,
2273            this=this,
2274            kind=kind,
2275            for_or_in=for_or_in,
2276            lock_type=lock_type,
2277            override=override,
2278        )
2279
2280    def _parse_partition_by(self) -> t.List[exp.Expression]:
2281        if self._match(TokenType.PARTITION_BY):
2282            return self._parse_csv(self._parse_assignment)
2283        return []
2284
2285    def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec:
2286        def _parse_partition_bound_expr() -> t.Optional[exp.Expression]:
2287            if self._match_text_seq("MINVALUE"):
2288                return exp.var("MINVALUE")
2289            if self._match_text_seq("MAXVALUE"):
2290                return exp.var("MAXVALUE")
2291            return self._parse_bitwise()
2292
2293        this: t.Optional[exp.Expression | t.List[exp.Expression]] = None
2294        expression = None
2295        from_expressions = None
2296        to_expressions = None
2297
2298        if self._match(TokenType.IN):
2299            this = self._parse_wrapped_csv(self._parse_bitwise)
2300        elif self._match(TokenType.FROM):
2301            from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
2302            self._match_text_seq("TO")
2303            to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
2304        elif self._match_text_seq("WITH", "(", "MODULUS"):
2305            this = self._parse_number()
2306            self._match_text_seq(",", "REMAINDER")
2307            expression = self._parse_number()
2308            self._match_r_paren()
2309        else:
2310            self.raise_error("Failed to parse partition bound spec.")
2311
2312        return self.expression(
2313            exp.PartitionBoundSpec,
2314            this=this,
2315            expression=expression,
2316            from_expressions=from_expressions,
2317            to_expressions=to_expressions,
2318        )
2319
2320    # https://www.postgresql.org/docs/current/sql-createtable.html
2321    def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]:
2322        if not self._match_text_seq("OF"):
2323            self._retreat(self._index - 1)
2324            return None
2325
2326        this = self._parse_table(schema=True)
2327
2328        if self._match(TokenType.DEFAULT):
2329            expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT")
2330        elif self._match_text_seq("FOR", "VALUES"):
2331            expression = self._parse_partition_bound_spec()
2332        else:
2333            self.raise_error("Expecting either DEFAULT or FOR VALUES clause.")
2334
2335        return self.expression(exp.PartitionedOfProperty, this=this, expression=expression)
2336
2337    def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
2338        self._match(TokenType.EQ)
2339        return self.expression(
2340            exp.PartitionedByProperty,
2341            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
2342        )
2343
2344    def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
2345        if self._match_text_seq("AND", "STATISTICS"):
2346            statistics = True
2347        elif self._match_text_seq("AND", "NO", "STATISTICS"):
2348            statistics = False
2349        else:
2350            statistics = None
2351
2352        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
2353
2354    def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2355        if self._match_text_seq("SQL"):
2356            return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL")
2357        return None
2358
2359    def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2360        if self._match_text_seq("SQL", "DATA"):
2361            return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA")
2362        return None
2363
2364    def _parse_no_property(self) -> t.Optional[exp.Expression]:
2365        if self._match_text_seq("PRIMARY", "INDEX"):
2366            return exp.NoPrimaryIndexProperty()
2367        if self._match_text_seq("SQL"):
2368            return self.expression(exp.SqlReadWriteProperty, this="NO SQL")
2369        return None
2370
2371    def _parse_on_property(self) -> t.Optional[exp.Expression]:
2372        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
2373            return exp.OnCommitProperty()
2374        if self._match_text_seq("COMMIT", "DELETE", "ROWS"):
2375            return exp.OnCommitProperty(delete=True)
2376        return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var()))
2377
2378    def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2379        if self._match_text_seq("SQL", "DATA"):
2380            return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA")
2381        return None
2382
2383    def _parse_distkey(self) -> exp.DistKeyProperty:
2384        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
2385
2386    def _parse_create_like(self) -> t.Optional[exp.LikeProperty]:
2387        table = self._parse_table(schema=True)
2388
2389        options = []
2390        while self._match_texts(("INCLUDING", "EXCLUDING")):
2391            this = self._prev.text.upper()
2392
2393            id_var = self._parse_id_var()
2394            if not id_var:
2395                return None
2396
2397            options.append(
2398                self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper()))
2399            )
2400
2401        return self.expression(exp.LikeProperty, this=table, expressions=options)
2402
2403    def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
2404        return self.expression(
2405            exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound
2406        )
2407
2408    def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
2409        self._match(TokenType.EQ)
2410        return self.expression(
2411            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
2412        )
2413
2414    def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty:
2415        self._match_text_seq("WITH", "CONNECTION")
2416        return self.expression(
2417            exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts()
2418        )
2419
2420    def _parse_returns(self) -> exp.ReturnsProperty:
2421        value: t.Optional[exp.Expression]
2422        null = None
2423        is_table = self._match(TokenType.TABLE)
2424
2425        if is_table:
2426            if self._match(TokenType.LT):
2427                value = self.expression(
2428                    exp.Schema,
2429                    this="TABLE",
2430                    expressions=self._parse_csv(self._parse_struct_types),
2431                )
2432                if not self._match(TokenType.GT):
2433                    self.raise_error("Expecting >")
2434            else:
2435                value = self._parse_schema(exp.var("TABLE"))
2436        elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"):
2437            null = True
2438            value = None
2439        else:
2440            value = self._parse_types()
2441
2442        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null)
2443
2444    def _parse_describe(self) -> exp.Describe:
2445        kind = self._match_set(self.CREATABLES) and self._prev.text
2446        style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper()
2447        if self._match(TokenType.DOT):
2448            style = None
2449            self._retreat(self._index - 2)
2450        this = self._parse_table(schema=True)
2451        properties = self._parse_properties()
2452        expressions = properties.expressions if properties else None
2453        return self.expression(
2454            exp.Describe, this=this, style=style, kind=kind, expressions=expressions
2455        )
2456
2457    def _parse_insert(self) -> exp.Insert:
2458        comments = ensure_list(self._prev_comments)
2459        hint = self._parse_hint()
2460        overwrite = self._match(TokenType.OVERWRITE)
2461        ignore = self._match(TokenType.IGNORE)
2462        local = self._match_text_seq("LOCAL")
2463        alternative = None
2464        is_function = None
2465
2466        if self._match_text_seq("DIRECTORY"):
2467            this: t.Optional[exp.Expression] = self.expression(
2468                exp.Directory,
2469                this=self._parse_var_or_string(),
2470                local=local,
2471                row_format=self._parse_row_format(match_row=True),
2472            )
2473        else:
2474            if self._match(TokenType.OR):
2475                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
2476
2477            self._match(TokenType.INTO)
2478            comments += ensure_list(self._prev_comments)
2479            self._match(TokenType.TABLE)
2480            is_function = self._match(TokenType.FUNCTION)
2481
2482            this = (
2483                self._parse_table(schema=True, parse_partition=True)
2484                if not is_function
2485                else self._parse_function()
2486            )
2487
2488        returning = self._parse_returning()
2489
2490        return self.expression(
2491            exp.Insert,
2492            comments=comments,
2493            hint=hint,
2494            is_function=is_function,
2495            this=this,
2496            stored=self._match_text_seq("STORED") and self._parse_stored(),
2497            by_name=self._match_text_seq("BY", "NAME"),
2498            exists=self._parse_exists(),
2499            where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(),
2500            expression=self._parse_derived_table_values() or self._parse_ddl_select(),
2501            conflict=self._parse_on_conflict(),
2502            returning=returning or self._parse_returning(),
2503            overwrite=overwrite,
2504            alternative=alternative,
2505            ignore=ignore,
2506        )
2507
2508    def _parse_kill(self) -> exp.Kill:
2509        kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None
2510
2511        return self.expression(
2512            exp.Kill,
2513            this=self._parse_primary(),
2514            kind=kind,
2515        )
2516
2517    def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]:
2518        conflict = self._match_text_seq("ON", "CONFLICT")
2519        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
2520
2521        if not conflict and not duplicate:
2522            return None
2523
2524        conflict_keys = None
2525        constraint = None
2526
2527        if conflict:
2528            if self._match_text_seq("ON", "CONSTRAINT"):
2529                constraint = self._parse_id_var()
2530            elif self._match(TokenType.L_PAREN):
2531                conflict_keys = self._parse_csv(self._parse_id_var)
2532                self._match_r_paren()
2533
2534        action = self._parse_var_from_options(self.CONFLICT_ACTIONS)
2535        if self._prev.token_type == TokenType.UPDATE:
2536            self._match(TokenType.SET)
2537            expressions = self._parse_csv(self._parse_equality)
2538        else:
2539            expressions = None
2540
2541        return self.expression(
2542            exp.OnConflict,
2543            duplicate=duplicate,
2544            expressions=expressions,
2545            action=action,
2546            conflict_keys=conflict_keys,
2547            constraint=constraint,
2548        )
2549
2550    def _parse_returning(self) -> t.Optional[exp.Returning]:
2551        if not self._match(TokenType.RETURNING):
2552            return None
2553        return self.expression(
2554            exp.Returning,
2555            expressions=self._parse_csv(self._parse_expression),
2556            into=self._match(TokenType.INTO) and self._parse_table_part(),
2557        )
2558
2559    def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2560        if not self._match(TokenType.FORMAT):
2561            return None
2562        return self._parse_row_format()
2563
2564    def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]:
2565        index = self._index
2566        with_ = with_ or self._match_text_seq("WITH")
2567
2568        if not self._match(TokenType.SERDE_PROPERTIES):
2569            self._retreat(index)
2570            return None
2571        return self.expression(
2572            exp.SerdeProperties,
2573            **{  # type: ignore
2574                "expressions": self._parse_wrapped_properties(),
2575                "with": with_,
2576            },
2577        )
2578
2579    def _parse_row_format(
2580        self, match_row: bool = False
2581    ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2582        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
2583            return None
2584
2585        if self._match_text_seq("SERDE"):
2586            this = self._parse_string()
2587
2588            serde_properties = self._parse_serde_properties()
2589
2590            return self.expression(
2591                exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties
2592            )
2593
2594        self._match_text_seq("DELIMITED")
2595
2596        kwargs = {}
2597
2598        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
2599            kwargs["fields"] = self._parse_string()
2600            if self._match_text_seq("ESCAPED", "BY"):
2601                kwargs["escaped"] = self._parse_string()
2602        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
2603            kwargs["collection_items"] = self._parse_string()
2604        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
2605            kwargs["map_keys"] = self._parse_string()
2606        if self._match_text_seq("LINES", "TERMINATED", "BY"):
2607            kwargs["lines"] = self._parse_string()
2608        if self._match_text_seq("NULL", "DEFINED", "AS"):
2609            kwargs["null"] = self._parse_string()
2610
2611        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
2612
2613    def _parse_load(self) -> exp.LoadData | exp.Command:
2614        if self._match_text_seq("DATA"):
2615            local = self._match_text_seq("LOCAL")
2616            self._match_text_seq("INPATH")
2617            inpath = self._parse_string()
2618            overwrite = self._match(TokenType.OVERWRITE)
2619            self._match_pair(TokenType.INTO, TokenType.TABLE)
2620
2621            return self.expression(
2622                exp.LoadData,
2623                this=self._parse_table(schema=True),
2624                local=local,
2625                overwrite=overwrite,
2626                inpath=inpath,
2627                partition=self._parse_partition(),
2628                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
2629                serde=self._match_text_seq("SERDE") and self._parse_string(),
2630            )
2631        return self._parse_as_command(self._prev)
2632
2633    def _parse_delete(self) -> exp.Delete:
2634        # This handles MySQL's "Multiple-Table Syntax"
2635        # https://dev.mysql.com/doc/refman/8.0/en/delete.html
2636        tables = None
2637        comments = self._prev_comments
2638        if not self._match(TokenType.FROM, advance=False):
2639            tables = self._parse_csv(self._parse_table) or None
2640
2641        returning = self._parse_returning()
2642
2643        return self.expression(
2644            exp.Delete,
2645            comments=comments,
2646            tables=tables,
2647            this=self._match(TokenType.FROM) and self._parse_table(joins=True),
2648            using=self._match(TokenType.USING) and self._parse_table(joins=True),
2649            where=self._parse_where(),
2650            returning=returning or self._parse_returning(),
2651            limit=self._parse_limit(),
2652        )
2653
2654    def _parse_update(self) -> exp.Update:
2655        comments = self._prev_comments
2656        this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS)
2657        expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
2658        returning = self._parse_returning()
2659        return self.expression(
2660            exp.Update,
2661            comments=comments,
2662            **{  # type: ignore
2663                "this": this,
2664                "expressions": expressions,
2665                "from": self._parse_from(joins=True),
2666                "where": self._parse_where(),
2667                "returning": returning or self._parse_returning(),
2668                "order": self._parse_order(),
2669                "limit": self._parse_limit(),
2670            },
2671        )
2672
2673    def _parse_uncache(self) -> exp.Uncache:
2674        if not self._match(TokenType.TABLE):
2675            self.raise_error("Expecting TABLE after UNCACHE")
2676
2677        return self.expression(
2678            exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True)
2679        )
2680
2681    def _parse_cache(self) -> exp.Cache:
2682        lazy = self._match_text_seq("LAZY")
2683        self._match(TokenType.TABLE)
2684        table = self._parse_table(schema=True)
2685
2686        options = []
2687        if self._match_text_seq("OPTIONS"):
2688            self._match_l_paren()
2689            k = self._parse_string()
2690            self._match(TokenType.EQ)
2691            v = self._parse_string()
2692            options = [k, v]
2693            self._match_r_paren()
2694
2695        self._match(TokenType.ALIAS)
2696        return self.expression(
2697            exp.Cache,
2698            this=table,
2699            lazy=lazy,
2700            options=options,
2701            expression=self._parse_select(nested=True),
2702        )
2703
2704    def _parse_partition(self) -> t.Optional[exp.Partition]:
2705        if not self._match(TokenType.PARTITION):
2706            return None
2707
2708        return self.expression(
2709            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment)
2710        )
2711
2712    def _parse_value(self) -> t.Optional[exp.Tuple]:
2713        if self._match(TokenType.L_PAREN):
2714            expressions = self._parse_csv(self._parse_expression)
2715            self._match_r_paren()
2716            return self.expression(exp.Tuple, expressions=expressions)
2717
2718        # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows.
2719        expression = self._parse_expression()
2720        if expression:
2721            return self.expression(exp.Tuple, expressions=[expression])
2722        return None
2723
2724    def _parse_projections(self) -> t.List[exp.Expression]:
2725        return self._parse_expressions()
2726
2727    def _parse_select(
2728        self,
2729        nested: bool = False,
2730        table: bool = False,
2731        parse_subquery_alias: bool = True,
2732        parse_set_operation: bool = True,
2733    ) -> t.Optional[exp.Expression]:
2734        cte = self._parse_with()
2735
2736        if cte:
2737            this = self._parse_statement()
2738
2739            if not this:
2740                self.raise_error("Failed to parse any statement following CTE")
2741                return cte
2742
2743            if "with" in this.arg_types:
2744                this.set("with", cte)
2745            else:
2746                self.raise_error(f"{this.key} does not support CTE")
2747                this = cte
2748
2749            return this
2750
2751        # duckdb supports leading with FROM x
2752        from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None
2753
2754        if self._match(TokenType.SELECT):
2755            comments = self._prev_comments
2756
2757            hint = self._parse_hint()
2758
2759            if self._next and not self._next.token_type == TokenType.DOT:
2760                all_ = self._match(TokenType.ALL)
2761                distinct = self._match_set(self.DISTINCT_TOKENS)
2762            else:
2763                all_, distinct = False, False
2764
2765            kind = (
2766                self._match(TokenType.ALIAS)
2767                and self._match_texts(("STRUCT", "VALUE"))
2768                and self._prev.text.upper()
2769            )
2770
2771            if distinct:
2772                distinct = self.expression(
2773                    exp.Distinct,
2774                    on=self._parse_value() if self._match(TokenType.ON) else None,
2775                )
2776
2777            if all_ and distinct:
2778                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
2779
2780            limit = self._parse_limit(top=True)
2781            projections = self._parse_projections()
2782
2783            this = self.expression(
2784                exp.Select,
2785                kind=kind,
2786                hint=hint,
2787                distinct=distinct,
2788                expressions=projections,
2789                limit=limit,
2790            )
2791            this.comments = comments
2792
2793            into = self._parse_into()
2794            if into:
2795                this.set("into", into)
2796
2797            if not from_:
2798                from_ = self._parse_from()
2799
2800            if from_:
2801                this.set("from", from_)
2802
2803            this = self._parse_query_modifiers(this)
2804        elif (table or nested) and self._match(TokenType.L_PAREN):
2805            if self._match(TokenType.PIVOT):
2806                this = self._parse_simplified_pivot()
2807            elif self._match(TokenType.FROM):
2808                this = exp.select("*").from_(
2809                    t.cast(exp.From, self._parse_from(skip_from_token=True))
2810                )
2811            else:
2812                this = (
2813                    self._parse_table()
2814                    if table
2815                    else self._parse_select(nested=True, parse_set_operation=False)
2816                )
2817                this = self._parse_query_modifiers(self._parse_set_operations(this))
2818
2819            self._match_r_paren()
2820
2821            # We return early here so that the UNION isn't attached to the subquery by the
2822            # following call to _parse_set_operations, but instead becomes the parent node
2823            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
2824        elif self._match(TokenType.VALUES, advance=False):
2825            this = self._parse_derived_table_values()
2826        elif from_:
2827            this = exp.select("*").from_(from_.this, copy=False)
2828        else:
2829            this = None
2830
2831        if parse_set_operation:
2832            return self._parse_set_operations(this)
2833        return this
2834
2835    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
2836        if not skip_with_token and not self._match(TokenType.WITH):
2837            return None
2838
2839        comments = self._prev_comments
2840        recursive = self._match(TokenType.RECURSIVE)
2841
2842        expressions = []
2843        while True:
2844            expressions.append(self._parse_cte())
2845
2846            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
2847                break
2848            else:
2849                self._match(TokenType.WITH)
2850
2851        return self.expression(
2852            exp.With, comments=comments, expressions=expressions, recursive=recursive
2853        )
2854
2855    def _parse_cte(self) -> exp.CTE:
2856        alias = self._parse_table_alias(self.ID_VAR_TOKENS)
2857        if not alias or not alias.this:
2858            self.raise_error("Expected CTE to have alias")
2859
2860        self._match(TokenType.ALIAS)
2861        comments = self._prev_comments
2862
2863        if self._match_text_seq("NOT", "MATERIALIZED"):
2864            materialized = False
2865        elif self._match_text_seq("MATERIALIZED"):
2866            materialized = True
2867        else:
2868            materialized = None
2869
2870        return self.expression(
2871            exp.CTE,
2872            this=self._parse_wrapped(self._parse_statement),
2873            alias=alias,
2874            materialized=materialized,
2875            comments=comments,
2876        )
2877
2878    def _parse_table_alias(
2879        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2880    ) -> t.Optional[exp.TableAlias]:
2881        any_token = self._match(TokenType.ALIAS)
2882        alias = (
2883            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2884            or self._parse_string_as_identifier()
2885        )
2886
2887        index = self._index
2888        if self._match(TokenType.L_PAREN):
2889            columns = self._parse_csv(self._parse_function_parameter)
2890            self._match_r_paren() if columns else self._retreat(index)
2891        else:
2892            columns = None
2893
2894        if not alias and not columns:
2895            return None
2896
2897        table_alias = self.expression(exp.TableAlias, this=alias, columns=columns)
2898
2899        # We bubble up comments from the Identifier to the TableAlias
2900        if isinstance(alias, exp.Identifier):
2901            table_alias.add_comments(alias.pop_comments())
2902
2903        return table_alias
2904
2905    def _parse_subquery(
2906        self, this: t.Optional[exp.Expression], parse_alias: bool = True
2907    ) -> t.Optional[exp.Subquery]:
2908        if not this:
2909            return None
2910
2911        return self.expression(
2912            exp.Subquery,
2913            this=this,
2914            pivots=self._parse_pivots(),
2915            alias=self._parse_table_alias() if parse_alias else None,
2916        )
2917
2918    def _implicit_unnests_to_explicit(self, this: E) -> E:
2919        from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm
2920
2921        refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name}
2922        for i, join in enumerate(this.args.get("joins") or []):
2923            table = join.this
2924            normalized_table = table.copy()
2925            normalized_table.meta["maybe_column"] = True
2926            normalized_table = _norm(normalized_table, dialect=self.dialect)
2927
2928            if isinstance(table, exp.Table) and not join.args.get("on"):
2929                if normalized_table.parts[0].name in refs:
2930                    table_as_column = table.to_column()
2931                    unnest = exp.Unnest(expressions=[table_as_column])
2932
2933                    # Table.to_column creates a parent Alias node that we want to convert to
2934                    # a TableAlias and attach to the Unnest, so it matches the parser's output
2935                    if isinstance(table.args.get("alias"), exp.TableAlias):
2936                        table_as_column.replace(table_as_column.this)
2937                        exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False)
2938
2939                    table.replace(unnest)
2940
2941            refs.add(normalized_table.alias_or_name)
2942
2943        return this
2944
2945    def _parse_query_modifiers(
2946        self, this: t.Optional[exp.Expression]
2947    ) -> t.Optional[exp.Expression]:
2948        if isinstance(this, (exp.Query, exp.Table)):
2949            for join in self._parse_joins():
2950                this.append("joins", join)
2951            for lateral in iter(self._parse_lateral, None):
2952                this.append("laterals", lateral)
2953
2954            while True:
2955                if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False):
2956                    parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type]
2957                    key, expression = parser(self)
2958
2959                    if expression:
2960                        this.set(key, expression)
2961                        if key == "limit":
2962                            offset = expression.args.pop("offset", None)
2963
2964                            if offset:
2965                                offset = exp.Offset(expression=offset)
2966                                this.set("offset", offset)
2967
2968                                limit_by_expressions = expression.expressions
2969                                expression.set("expressions", None)
2970                                offset.set("expressions", limit_by_expressions)
2971                        continue
2972                break
2973
2974        if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"):
2975            this = self._implicit_unnests_to_explicit(this)
2976
2977        return this
2978
2979    def _parse_hint(self) -> t.Optional[exp.Hint]:
2980        if self._match(TokenType.HINT):
2981            hints = []
2982            for hint in iter(
2983                lambda: self._parse_csv(
2984                    lambda: self._parse_function() or self._parse_var(upper=True)
2985                ),
2986                [],
2987            ):
2988                hints.extend(hint)
2989
2990            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2991                self.raise_error("Expected */ after HINT")
2992
2993            return self.expression(exp.Hint, expressions=hints)
2994
2995        return None
2996
2997    def _parse_into(self) -> t.Optional[exp.Into]:
2998        if not self._match(TokenType.INTO):
2999            return None
3000
3001        temp = self._match(TokenType.TEMPORARY)
3002        unlogged = self._match_text_seq("UNLOGGED")
3003        self._match(TokenType.TABLE)
3004
3005        return self.expression(
3006            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
3007        )
3008
3009    def _parse_from(
3010        self, joins: bool = False, skip_from_token: bool = False
3011    ) -> t.Optional[exp.From]:
3012        if not skip_from_token and not self._match(TokenType.FROM):
3013            return None
3014
3015        return self.expression(
3016            exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
3017        )
3018
3019    def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure:
3020        return self.expression(
3021            exp.MatchRecognizeMeasure,
3022            window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(),
3023            this=self._parse_expression(),
3024        )
3025
3026    def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]:
3027        if not self._match(TokenType.MATCH_RECOGNIZE):
3028            return None
3029
3030        self._match_l_paren()
3031
3032        partition = self._parse_partition_by()
3033        order = self._parse_order()
3034
3035        measures = (
3036            self._parse_csv(self._parse_match_recognize_measure)
3037            if self._match_text_seq("MEASURES")
3038            else None
3039        )
3040
3041        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
3042            rows = exp.var("ONE ROW PER MATCH")
3043        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
3044            text = "ALL ROWS PER MATCH"
3045            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
3046                text += " SHOW EMPTY MATCHES"
3047            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
3048                text += " OMIT EMPTY MATCHES"
3049            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
3050                text += " WITH UNMATCHED ROWS"
3051            rows = exp.var(text)
3052        else:
3053            rows = None
3054
3055        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
3056            text = "AFTER MATCH SKIP"
3057            if self._match_text_seq("PAST", "LAST", "ROW"):
3058                text += " PAST LAST ROW"
3059            elif self._match_text_seq("TO", "NEXT", "ROW"):
3060                text += " TO NEXT ROW"
3061            elif self._match_text_seq("TO", "FIRST"):
3062                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
3063            elif self._match_text_seq("TO", "LAST"):
3064                text += f" TO LAST {self._advance_any().text}"  # type: ignore
3065            after = exp.var(text)
3066        else:
3067            after = None
3068
3069        if self._match_text_seq("PATTERN"):
3070            self._match_l_paren()
3071
3072            if not self._curr:
3073                self.raise_error("Expecting )", self._curr)
3074
3075            paren = 1
3076            start = self._curr
3077
3078            while self._curr and paren > 0:
3079                if self._curr.token_type == TokenType.L_PAREN:
3080                    paren += 1
3081                if self._curr.token_type == TokenType.R_PAREN:
3082                    paren -= 1
3083
3084                end = self._prev
3085                self._advance()
3086
3087            if paren > 0:
3088                self.raise_error("Expecting )", self._curr)
3089
3090            pattern = exp.var(self._find_sql(start, end))
3091        else:
3092            pattern = None
3093
3094        define = (
3095            self._parse_csv(self._parse_name_as_expression)
3096            if self._match_text_seq("DEFINE")
3097            else None
3098        )
3099
3100        self._match_r_paren()
3101
3102        return self.expression(
3103            exp.MatchRecognize,
3104            partition_by=partition,
3105            order=order,
3106            measures=measures,
3107            rows=rows,
3108            after=after,
3109            pattern=pattern,
3110            define=define,
3111            alias=self._parse_table_alias(),
3112        )
3113
3114    def _parse_lateral(self) -> t.Optional[exp.Lateral]:
3115        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
3116        if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY):
3117            cross_apply = False
3118
3119        if cross_apply is not None:
3120            this = self._parse_select(table=True)
3121            view = None
3122            outer = None
3123        elif self._match(TokenType.LATERAL):
3124            this = self._parse_select(table=True)
3125            view = self._match(TokenType.VIEW)
3126            outer = self._match(TokenType.OUTER)
3127        else:
3128            return None
3129
3130        if not this:
3131            this = (
3132                self._parse_unnest()
3133                or self._parse_function()
3134                or self._parse_id_var(any_token=False)
3135            )
3136
3137            while self._match(TokenType.DOT):
3138                this = exp.Dot(
3139                    this=this,
3140                    expression=self._parse_function() or self._parse_id_var(any_token=False),
3141                )
3142
3143        if view:
3144            table = self._parse_id_var(any_token=False)
3145            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
3146            table_alias: t.Optional[exp.TableAlias] = self.expression(
3147                exp.TableAlias, this=table, columns=columns
3148            )
3149        elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias:
3150            # We move the alias from the lateral's child node to the lateral itself
3151            table_alias = this.args["alias"].pop()
3152        else:
3153            table_alias = self._parse_table_alias()
3154
3155        return self.expression(
3156            exp.Lateral,
3157            this=this,
3158            view=view,
3159            outer=outer,
3160            alias=table_alias,
3161            cross_apply=cross_apply,
3162        )
3163
3164    def _parse_join_parts(
3165        self,
3166    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
3167        return (
3168            self._match_set(self.JOIN_METHODS) and self._prev,
3169            self._match_set(self.JOIN_SIDES) and self._prev,
3170            self._match_set(self.JOIN_KINDS) and self._prev,
3171        )
3172
3173    def _parse_join(
3174        self, skip_join_token: bool = False, parse_bracket: bool = False
3175    ) -> t.Optional[exp.Join]:
3176        if self._match(TokenType.COMMA):
3177            return self.expression(exp.Join, this=self._parse_table())
3178
3179        index = self._index
3180        method, side, kind = self._parse_join_parts()
3181        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
3182        join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN)
3183
3184        if not skip_join_token and not join:
3185            self._retreat(index)
3186            kind = None
3187            method = None
3188            side = None
3189
3190        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
3191        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
3192
3193        if not skip_join_token and not join and not outer_apply and not cross_apply:
3194            return None
3195
3196        kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)}
3197
3198        if method:
3199            kwargs["method"] = method.text
3200        if side:
3201            kwargs["side"] = side.text
3202        if kind:
3203            kwargs["kind"] = kind.text
3204        if hint:
3205            kwargs["hint"] = hint
3206
3207        if self._match(TokenType.MATCH_CONDITION):
3208            kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison)
3209
3210        if self._match(TokenType.ON):
3211            kwargs["on"] = self._parse_assignment()
3212        elif self._match(TokenType.USING):
3213            kwargs["using"] = self._parse_wrapped_id_vars()
3214        elif not isinstance(kwargs["this"], exp.Unnest) and not (
3215            kind and kind.token_type == TokenType.CROSS
3216        ):
3217            index = self._index
3218            joins: t.Optional[list] = list(self._parse_joins())
3219
3220            if joins and self._match(TokenType.ON):
3221                kwargs["on"] = self._parse_assignment()
3222            elif joins and self._match(TokenType.USING):
3223                kwargs["using"] = self._parse_wrapped_id_vars()
3224            else:
3225                joins = None
3226                self._retreat(index)
3227
3228            kwargs["this"].set("joins", joins if joins else None)
3229
3230        comments = [c for token in (method, side, kind) if token for c in token.comments]
3231        return self.expression(exp.Join, comments=comments, **kwargs)
3232
3233    def _parse_opclass(self) -> t.Optional[exp.Expression]:
3234        this = self._parse_assignment()
3235
3236        if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
3237            return this
3238
3239        if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False):
3240            return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts())
3241
3242        return this
3243
3244    def _parse_index_params(self) -> exp.IndexParameters:
3245        using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
3246
3247        if self._match(TokenType.L_PAREN, advance=False):
3248            columns = self._parse_wrapped_csv(self._parse_with_operator)
3249        else:
3250            columns = None
3251
3252        include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
3253        partition_by = self._parse_partition_by()
3254        with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties()
3255        tablespace = (
3256            self._parse_var(any_token=True)
3257            if self._match_text_seq("USING", "INDEX", "TABLESPACE")
3258            else None
3259        )
3260        where = self._parse_where()
3261
3262        on = self._parse_field() if self._match(TokenType.ON) else None
3263
3264        return self.expression(
3265            exp.IndexParameters,
3266            using=using,
3267            columns=columns,
3268            include=include,
3269            partition_by=partition_by,
3270            where=where,
3271            with_storage=with_storage,
3272            tablespace=tablespace,
3273            on=on,
3274        )
3275
3276    def _parse_index(
3277        self, index: t.Optional[exp.Expression] = None, anonymous: bool = False
3278    ) -> t.Optional[exp.Index]:
3279        if index or anonymous:
3280            unique = None
3281            primary = None
3282            amp = None
3283
3284            self._match(TokenType.ON)
3285            self._match(TokenType.TABLE)  # hive
3286            table = self._parse_table_parts(schema=True)
3287        else:
3288            unique = self._match(TokenType.UNIQUE)
3289            primary = self._match_text_seq("PRIMARY")
3290            amp = self._match_text_seq("AMP")
3291
3292            if not self._match(TokenType.INDEX):
3293                return None
3294
3295            index = self._parse_id_var()
3296            table = None
3297
3298        params = self._parse_index_params()
3299
3300        return self.expression(
3301            exp.Index,
3302            this=index,
3303            table=table,
3304            unique=unique,
3305            primary=primary,
3306            amp=amp,
3307            params=params,
3308        )
3309
3310    def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
3311        hints: t.List[exp.Expression] = []
3312        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
3313            # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16
3314            hints.append(
3315                self.expression(
3316                    exp.WithTableHint,
3317                    expressions=self._parse_csv(
3318                        lambda: self._parse_function() or self._parse_var(any_token=True)
3319                    ),
3320                )
3321            )
3322            self._match_r_paren()
3323        else:
3324            # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html
3325            while self._match_set(self.TABLE_INDEX_HINT_TOKENS):
3326                hint = exp.IndexTableHint(this=self._prev.text.upper())
3327
3328                self._match_set((TokenType.INDEX, TokenType.KEY))
3329                if self._match(TokenType.FOR):
3330                    hint.set("target", self._advance_any() and self._prev.text.upper())
3331
3332                hint.set("expressions", self._parse_wrapped_id_vars())
3333                hints.append(hint)
3334
3335        return hints or None
3336
3337    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
3338        return (
3339            (not schema and self._parse_function(optional_parens=False))
3340            or self._parse_id_var(any_token=False)
3341            or self._parse_string_as_identifier()
3342            or self._parse_placeholder()
3343        )
3344
3345    def _parse_table_parts(
3346        self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False
3347    ) -> exp.Table:
3348        catalog = None
3349        db = None
3350        table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema)
3351
3352        while self._match(TokenType.DOT):
3353            if catalog:
3354                # This allows nesting the table in arbitrarily many dot expressions if needed
3355                table = self.expression(
3356                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
3357                )
3358            else:
3359                catalog = db
3360                db = table
3361                # "" used for tsql FROM a..b case
3362                table = self._parse_table_part(schema=schema) or ""
3363
3364        if (
3365            wildcard
3366            and self._is_connected()
3367            and (isinstance(table, exp.Identifier) or not table)
3368            and self._match(TokenType.STAR)
3369        ):
3370            if isinstance(table, exp.Identifier):
3371                table.args["this"] += "*"
3372            else:
3373                table = exp.Identifier(this="*")
3374
3375        # We bubble up comments from the Identifier to the Table
3376        comments = table.pop_comments() if isinstance(table, exp.Expression) else None
3377
3378        if is_db_reference:
3379            catalog = db
3380            db = table
3381            table = None
3382
3383        if not table and not is_db_reference:
3384            self.raise_error(f"Expected table name but got {self._curr}")
3385        if not db and is_db_reference:
3386            self.raise_error(f"Expected database name but got {self._curr}")
3387
3388        table = self.expression(
3389            exp.Table,
3390            comments=comments,
3391            this=table,
3392            db=db,
3393            catalog=catalog,
3394        )
3395
3396        changes = self._parse_changes()
3397        if changes:
3398            table.set("changes", changes)
3399
3400        at_before = self._parse_historical_data()
3401        if at_before:
3402            table.set("when", at_before)
3403
3404        pivots = self._parse_pivots()
3405        if pivots:
3406            table.set("pivots", pivots)
3407
3408        return table
3409
3410    def _parse_table(
3411        self,
3412        schema: bool = False,
3413        joins: bool = False,
3414        alias_tokens: t.Optional[t.Collection[TokenType]] = None,
3415        parse_bracket: bool = False,
3416        is_db_reference: bool = False,
3417        parse_partition: bool = False,
3418    ) -> t.Optional[exp.Expression]:
3419        lateral = self._parse_lateral()
3420        if lateral:
3421            return lateral
3422
3423        unnest = self._parse_unnest()
3424        if unnest:
3425            return unnest
3426
3427        values = self._parse_derived_table_values()
3428        if values:
3429            return values
3430
3431        subquery = self._parse_select(table=True)
3432        if subquery:
3433            if not subquery.args.get("pivots"):
3434                subquery.set("pivots", self._parse_pivots())
3435            return subquery
3436
3437        bracket = parse_bracket and self._parse_bracket(None)
3438        bracket = self.expression(exp.Table, this=bracket) if bracket else None
3439
3440        rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv(
3441            self._parse_table
3442        )
3443        rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None
3444
3445        only = self._match(TokenType.ONLY)
3446
3447        this = t.cast(
3448            exp.Expression,
3449            bracket
3450            or rows_from
3451            or self._parse_bracket(
3452                self._parse_table_parts(schema=schema, is_db_reference=is_db_reference)
3453            ),
3454        )
3455
3456        if only:
3457            this.set("only", only)
3458
3459        # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context
3460        self._match_text_seq("*")
3461
3462        parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION
3463        if parse_partition and self._match(TokenType.PARTITION, advance=False):
3464            this.set("partition", self._parse_partition())
3465
3466        if schema:
3467            return self._parse_schema(this=this)
3468
3469        version = self._parse_version()
3470
3471        if version:
3472            this.set("version", version)
3473
3474        if self.dialect.ALIAS_POST_TABLESAMPLE:
3475            table_sample = self._parse_table_sample()
3476
3477        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
3478        if alias:
3479            this.set("alias", alias)
3480
3481        if isinstance(this, exp.Table) and self._match_text_seq("AT"):
3482            return self.expression(
3483                exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var()
3484            )
3485
3486        this.set("hints", self._parse_table_hints())
3487
3488        if not this.args.get("pivots"):
3489            this.set("pivots", self._parse_pivots())
3490
3491        if not self.dialect.ALIAS_POST_TABLESAMPLE:
3492            table_sample = self._parse_table_sample()
3493
3494        if table_sample:
3495            table_sample.set("this", this)
3496            this = table_sample
3497
3498        if joins:
3499            for join in self._parse_joins():
3500                this.append("joins", join)
3501
3502        if self._match_pair(TokenType.WITH, TokenType.ORDINALITY):
3503            this.set("ordinality", True)
3504            this.set("alias", self._parse_table_alias())
3505
3506        return this
3507
3508    def _parse_version(self) -> t.Optional[exp.Version]:
3509        if self._match(TokenType.TIMESTAMP_SNAPSHOT):
3510            this = "TIMESTAMP"
3511        elif self._match(TokenType.VERSION_SNAPSHOT):
3512            this = "VERSION"
3513        else:
3514            return None
3515
3516        if self._match_set((TokenType.FROM, TokenType.BETWEEN)):
3517            kind = self._prev.text.upper()
3518            start = self._parse_bitwise()
3519            self._match_texts(("TO", "AND"))
3520            end = self._parse_bitwise()
3521            expression: t.Optional[exp.Expression] = self.expression(
3522                exp.Tuple, expressions=[start, end]
3523            )
3524        elif self._match_text_seq("CONTAINED", "IN"):
3525            kind = "CONTAINED IN"
3526            expression = self.expression(
3527                exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise)
3528            )
3529        elif self._match(TokenType.ALL):
3530            kind = "ALL"
3531            expression = None
3532        else:
3533            self._match_text_seq("AS", "OF")
3534            kind = "AS OF"
3535            expression = self._parse_type()
3536
3537        return self.expression(exp.Version, this=this, expression=expression, kind=kind)
3538
3539    def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]:
3540        # https://docs.snowflake.com/en/sql-reference/constructs/at-before
3541        index = self._index
3542        historical_data = None
3543        if self._match_texts(self.HISTORICAL_DATA_PREFIX):
3544            this = self._prev.text.upper()
3545            kind = (
3546                self._match(TokenType.L_PAREN)
3547                and self._match_texts(self.HISTORICAL_DATA_KIND)
3548                and self._prev.text.upper()
3549            )
3550            expression = self._match(TokenType.FARROW) and self._parse_bitwise()
3551
3552            if expression:
3553                self._match_r_paren()
3554                historical_data = self.expression(
3555                    exp.HistoricalData, this=this, kind=kind, expression=expression
3556                )
3557            else:
3558                self._retreat(index)
3559
3560        return historical_data
3561
3562    def _parse_changes(self) -> t.Optional[exp.Changes]:
3563        if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"):
3564            return None
3565
3566        information = self._parse_var(any_token=True)
3567        self._match_r_paren()
3568
3569        return self.expression(
3570            exp.Changes,
3571            information=information,
3572            at_before=self._parse_historical_data(),
3573            end=self._parse_historical_data(),
3574        )
3575
3576    def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]:
3577        if not self._match(TokenType.UNNEST):
3578            return None
3579
3580        expressions = self._parse_wrapped_csv(self._parse_equality)
3581        offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
3582
3583        alias = self._parse_table_alias() if with_alias else None
3584
3585        if alias:
3586            if self.dialect.UNNEST_COLUMN_ONLY:
3587                if alias.args.get("columns"):
3588                    self.raise_error("Unexpected extra column alias in unnest.")
3589
3590                alias.set("columns", [alias.this])
3591                alias.set("this", None)
3592
3593            columns = alias.args.get("columns") or []
3594            if offset and len(expressions) < len(columns):
3595                offset = columns.pop()
3596
3597        if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET):
3598            self._match(TokenType.ALIAS)
3599            offset = self._parse_id_var(
3600                any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS
3601            ) or exp.to_identifier("offset")
3602
3603        return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset)
3604
3605    def _parse_derived_table_values(self) -> t.Optional[exp.Values]:
3606        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
3607        if not is_derived and not self._match_text_seq("VALUES"):
3608            return None
3609
3610        expressions = self._parse_csv(self._parse_value)
3611        alias = self._parse_table_alias()
3612
3613        if is_derived:
3614            self._match_r_paren()
3615
3616        return self.expression(
3617            exp.Values, expressions=expressions, alias=alias or self._parse_table_alias()
3618        )
3619
3620    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
3621        if not self._match(TokenType.TABLE_SAMPLE) and not (
3622            as_modifier and self._match_text_seq("USING", "SAMPLE")
3623        ):
3624            return None
3625
3626        bucket_numerator = None
3627        bucket_denominator = None
3628        bucket_field = None
3629        percent = None
3630        size = None
3631        seed = None
3632
3633        method = self._parse_var(tokens=(TokenType.ROW,), upper=True)
3634        matched_l_paren = self._match(TokenType.L_PAREN)
3635
3636        if self.TABLESAMPLE_CSV:
3637            num = None
3638            expressions = self._parse_csv(self._parse_primary)
3639        else:
3640            expressions = None
3641            num = (
3642                self._parse_factor()
3643                if self._match(TokenType.NUMBER, advance=False)
3644                else self._parse_primary() or self._parse_placeholder()
3645            )
3646
3647        if self._match_text_seq("BUCKET"):
3648            bucket_numerator = self._parse_number()
3649            self._match_text_seq("OUT", "OF")
3650            bucket_denominator = bucket_denominator = self._parse_number()
3651            self._match(TokenType.ON)
3652            bucket_field = self._parse_field()
3653        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
3654            percent = num
3655        elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT:
3656            size = num
3657        else:
3658            percent = num
3659
3660        if matched_l_paren:
3661            self._match_r_paren()
3662
3663        if self._match(TokenType.L_PAREN):
3664            method = self._parse_var(upper=True)
3665            seed = self._match(TokenType.COMMA) and self._parse_number()
3666            self._match_r_paren()
3667        elif self._match_texts(("SEED", "REPEATABLE")):
3668            seed = self._parse_wrapped(self._parse_number)
3669
3670        if not method and self.DEFAULT_SAMPLING_METHOD:
3671            method = exp.var(self.DEFAULT_SAMPLING_METHOD)
3672
3673        return self.expression(
3674            exp.TableSample,
3675            expressions=expressions,
3676            method=method,
3677            bucket_numerator=bucket_numerator,
3678            bucket_denominator=bucket_denominator,
3679            bucket_field=bucket_field,
3680            percent=percent,
3681            size=size,
3682            seed=seed,
3683        )
3684
3685    def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]:
3686        return list(iter(self._parse_pivot, None)) or None
3687
3688    def _parse_joins(self) -> t.Iterator[exp.Join]:
3689        return iter(self._parse_join, None)
3690
3691    # https://duckdb.org/docs/sql/statements/pivot
3692    def _parse_simplified_pivot(self) -> exp.Pivot:
3693        def _parse_on() -> t.Optional[exp.Expression]:
3694            this = self._parse_bitwise()
3695            return self._parse_in(this) if self._match(TokenType.IN) else this
3696
3697        this = self._parse_table()
3698        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
3699        using = self._match(TokenType.USING) and self._parse_csv(
3700            lambda: self._parse_alias(self._parse_function())
3701        )
3702        group = self._parse_group()
3703        return self.expression(
3704            exp.Pivot, this=this, expressions=expressions, using=using, group=group
3705        )
3706
3707    def _parse_pivot_in(self) -> exp.In:
3708        def _parse_aliased_expression() -> t.Optional[exp.Expression]:
3709            this = self._parse_assignment()
3710
3711            self._match(TokenType.ALIAS)
3712            alias = self._parse_field()
3713            if alias:
3714                return self.expression(exp.PivotAlias, this=this, alias=alias)
3715
3716            return this
3717
3718        value = self._parse_column()
3719
3720        if not self._match_pair(TokenType.IN, TokenType.L_PAREN):
3721            self.raise_error("Expecting IN (")
3722
3723        aliased_expressions = self._parse_csv(_parse_aliased_expression)
3724
3725        self._match_r_paren()
3726        return self.expression(exp.In, this=value, expressions=aliased_expressions)
3727
3728    def _parse_pivot(self) -> t.Optional[exp.Pivot]:
3729        index = self._index
3730        include_nulls = None
3731
3732        if self._match(TokenType.PIVOT):
3733            unpivot = False
3734        elif self._match(TokenType.UNPIVOT):
3735            unpivot = True
3736
3737            # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax
3738            if self._match_text_seq("INCLUDE", "NULLS"):
3739                include_nulls = True
3740            elif self._match_text_seq("EXCLUDE", "NULLS"):
3741                include_nulls = False
3742        else:
3743            return None
3744
3745        expressions = []
3746
3747        if not self._match(TokenType.L_PAREN):
3748            self._retreat(index)
3749            return None
3750
3751        if unpivot:
3752            expressions = self._parse_csv(self._parse_column)
3753        else:
3754            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
3755
3756        if not expressions:
3757            self.raise_error("Failed to parse PIVOT's aggregation list")
3758
3759        if not self._match(TokenType.FOR):
3760            self.raise_error("Expecting FOR")
3761
3762        field = self._parse_pivot_in()
3763
3764        self._match_r_paren()
3765
3766        pivot = self.expression(
3767            exp.Pivot,
3768            expressions=expressions,
3769            field=field,
3770            unpivot=unpivot,
3771            include_nulls=include_nulls,
3772        )
3773
3774        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
3775            pivot.set("alias", self._parse_table_alias())
3776
3777        if not unpivot:
3778            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
3779
3780            columns: t.List[exp.Expression] = []
3781            for fld in pivot.args["field"].expressions:
3782                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
3783                for name in names:
3784                    if self.PREFIXED_PIVOT_COLUMNS:
3785                        name = f"{name}_{field_name}" if name else field_name
3786                    else:
3787                        name = f"{field_name}_{name}" if name else field_name
3788
3789                    columns.append(exp.to_identifier(name))
3790
3791            pivot.set("columns", columns)
3792
3793        return pivot
3794
3795    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
3796        return [agg.alias for agg in aggregations]
3797
3798    def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]:
3799        if not skip_where_token and not self._match(TokenType.PREWHERE):
3800            return None
3801
3802        return self.expression(
3803            exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment()
3804        )
3805
3806    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]:
3807        if not skip_where_token and not self._match(TokenType.WHERE):
3808            return None
3809
3810        return self.expression(
3811            exp.Where, comments=self._prev_comments, this=self._parse_assignment()
3812        )
3813
3814    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]:
3815        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
3816            return None
3817
3818        elements: t.Dict[str, t.Any] = defaultdict(list)
3819
3820        if self._match(TokenType.ALL):
3821            elements["all"] = True
3822        elif self._match(TokenType.DISTINCT):
3823            elements["all"] = False
3824
3825        while True:
3826            expressions = self._parse_csv(
3827                lambda: None
3828                if self._match(TokenType.ROLLUP, advance=False)
3829                else self._parse_assignment()
3830            )
3831            if expressions:
3832                elements["expressions"].extend(expressions)
3833
3834            grouping_sets = self._parse_grouping_sets()
3835            if grouping_sets:
3836                elements["grouping_sets"].extend(grouping_sets)
3837
3838            rollup = None
3839            cube = None
3840            totals = None
3841
3842            index = self._index
3843            with_ = self._match(TokenType.WITH)
3844            if self._match(TokenType.ROLLUP):
3845                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
3846                elements["rollup"].extend(ensure_list(rollup))
3847
3848            if self._match(TokenType.CUBE):
3849                cube = with_ or self._parse_wrapped_csv(self._parse_column)
3850                elements["cube"].extend(ensure_list(cube))
3851
3852            if self._match_text_seq("TOTALS"):
3853                totals = True
3854                elements["totals"] = True  # type: ignore
3855
3856            if not (grouping_sets or rollup or cube or totals):
3857                if with_:
3858                    self._retreat(index)
3859                break
3860
3861        return self.expression(exp.Group, **elements)  # type: ignore
3862
3863    def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]:
3864        if not self._match(TokenType.GROUPING_SETS):
3865            return None
3866
3867        return self._parse_wrapped_csv(self._parse_grouping_set)
3868
3869    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
3870        if self._match(TokenType.L_PAREN):
3871            grouping_set = self._parse_csv(self._parse_column)
3872            self._match_r_paren()
3873            return self.expression(exp.Tuple, expressions=grouping_set)
3874
3875        return self._parse_column()
3876
3877    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]:
3878        if not skip_having_token and not self._match(TokenType.HAVING):
3879            return None
3880        return self.expression(exp.Having, this=self._parse_assignment())
3881
3882    def _parse_qualify(self) -> t.Optional[exp.Qualify]:
3883        if not self._match(TokenType.QUALIFY):
3884            return None
3885        return self.expression(exp.Qualify, this=self._parse_assignment())
3886
3887    def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]:
3888        if skip_start_token:
3889            start = None
3890        elif self._match(TokenType.START_WITH):
3891            start = self._parse_assignment()
3892        else:
3893            return None
3894
3895        self.