Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_list, seq_get
  10from sqlglot.time import format_time
  11from sqlglot.tokens import Token, Tokenizer, TokenType
  12from sqlglot.trie import TrieResult, in_trie, new_trie
  13
  14if t.TYPE_CHECKING:
  15    from sqlglot._typing import E, Lit
  16    from sqlglot.dialects.dialect import Dialect, DialectType
  17
  18    T = t.TypeVar("T")
  19
  20logger = logging.getLogger("sqlglot")
  21
  22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]]
  23
  24
  25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
  26    if len(args) == 1 and args[0].is_star:
  27        return exp.StarMap(this=args[0])
  28
  29    keys = []
  30    values = []
  31    for i in range(0, len(args), 2):
  32        keys.append(args[i])
  33        values.append(args[i + 1])
  34
  35    return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
  36
  37
  38def build_like(args: t.List) -> exp.Escape | exp.Like:
  39    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  40    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  41
  42
  43def binary_range_parser(
  44    expr_type: t.Type[exp.Expression],
  45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  46    return lambda self, this: self._parse_escape(
  47        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  48    )
  49
  50
  51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func:
  52    # Default argument order is base, expression
  53    this = seq_get(args, 0)
  54    expression = seq_get(args, 1)
  55
  56    if expression:
  57        if not dialect.LOG_BASE_FIRST:
  58            this, expression = expression, this
  59        return exp.Log(this=this, expression=expression)
  60
  61    return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
  62
  63
  64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]:
  65    def _builder(args: t.List, dialect: Dialect) -> E:
  66        expression = expr_type(
  67            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
  68        )
  69        if len(args) > 2 and expr_type is exp.JSONExtract:
  70            expression.set("expressions", args[2:])
  71
  72        return expression
  73
  74    return _builder
  75
  76
  77class _Parser(type):
  78    def __new__(cls, clsname, bases, attrs):
  79        klass = super().__new__(cls, clsname, bases, attrs)
  80
  81        klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  82        klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  83
  84        return klass
  85
  86
  87class Parser(metaclass=_Parser):
  88    """
  89    Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
  90
  91    Args:
  92        error_level: The desired error level.
  93            Default: ErrorLevel.IMMEDIATE
  94        error_message_context: The amount of context to capture from a query string when displaying
  95            the error message (in number of characters).
  96            Default: 100
  97        max_errors: Maximum number of error messages to include in a raised ParseError.
  98            This is only relevant if error_level is ErrorLevel.RAISE.
  99            Default: 3
 100    """
 101
 102    FUNCTIONS: t.Dict[str, t.Callable] = {
 103        **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()},
 104        "CONCAT": lambda args, dialect: exp.Concat(
 105            expressions=args,
 106            safe=not dialect.STRICT_STRING_CONCAT,
 107            coalesce=dialect.CONCAT_COALESCE,
 108        ),
 109        "CONCAT_WS": lambda args, dialect: exp.ConcatWs(
 110            expressions=args,
 111            safe=not dialect.STRICT_STRING_CONCAT,
 112            coalesce=dialect.CONCAT_COALESCE,
 113        ),
 114        "DATE_TO_DATE_STR": lambda args: exp.Cast(
 115            this=seq_get(args, 0),
 116            to=exp.DataType(this=exp.DataType.Type.TEXT),
 117        ),
 118        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
 119        "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract),
 120        "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar),
 121        "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar),
 122        "LIKE": build_like,
 123        "LOG": build_logarithm,
 124        "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)),
 125        "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)),
 126        "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)),
 127        "TIME_TO_TIME_STR": lambda args: exp.Cast(
 128            this=seq_get(args, 0),
 129            to=exp.DataType(this=exp.DataType.Type.TEXT),
 130        ),
 131        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
 132            this=exp.Cast(
 133                this=seq_get(args, 0),
 134                to=exp.DataType(this=exp.DataType.Type.TEXT),
 135            ),
 136            start=exp.Literal.number(1),
 137            length=exp.Literal.number(10),
 138        ),
 139        "VAR_MAP": build_var_map,
 140    }
 141
 142    NO_PAREN_FUNCTIONS = {
 143        TokenType.CURRENT_DATE: exp.CurrentDate,
 144        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 145        TokenType.CURRENT_TIME: exp.CurrentTime,
 146        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 147        TokenType.CURRENT_USER: exp.CurrentUser,
 148    }
 149
 150    STRUCT_TYPE_TOKENS = {
 151        TokenType.NESTED,
 152        TokenType.OBJECT,
 153        TokenType.STRUCT,
 154    }
 155
 156    NESTED_TYPE_TOKENS = {
 157        TokenType.ARRAY,
 158        TokenType.LOWCARDINALITY,
 159        TokenType.MAP,
 160        TokenType.NULLABLE,
 161        *STRUCT_TYPE_TOKENS,
 162    }
 163
 164    ENUM_TYPE_TOKENS = {
 165        TokenType.ENUM,
 166        TokenType.ENUM8,
 167        TokenType.ENUM16,
 168    }
 169
 170    AGGREGATE_TYPE_TOKENS = {
 171        TokenType.AGGREGATEFUNCTION,
 172        TokenType.SIMPLEAGGREGATEFUNCTION,
 173    }
 174
 175    TYPE_TOKENS = {
 176        TokenType.BIT,
 177        TokenType.BOOLEAN,
 178        TokenType.TINYINT,
 179        TokenType.UTINYINT,
 180        TokenType.SMALLINT,
 181        TokenType.USMALLINT,
 182        TokenType.INT,
 183        TokenType.UINT,
 184        TokenType.BIGINT,
 185        TokenType.UBIGINT,
 186        TokenType.INT128,
 187        TokenType.UINT128,
 188        TokenType.INT256,
 189        TokenType.UINT256,
 190        TokenType.MEDIUMINT,
 191        TokenType.UMEDIUMINT,
 192        TokenType.FIXEDSTRING,
 193        TokenType.FLOAT,
 194        TokenType.DOUBLE,
 195        TokenType.CHAR,
 196        TokenType.NCHAR,
 197        TokenType.VARCHAR,
 198        TokenType.NVARCHAR,
 199        TokenType.BPCHAR,
 200        TokenType.TEXT,
 201        TokenType.MEDIUMTEXT,
 202        TokenType.LONGTEXT,
 203        TokenType.MEDIUMBLOB,
 204        TokenType.LONGBLOB,
 205        TokenType.BINARY,
 206        TokenType.VARBINARY,
 207        TokenType.JSON,
 208        TokenType.JSONB,
 209        TokenType.INTERVAL,
 210        TokenType.TINYBLOB,
 211        TokenType.TINYTEXT,
 212        TokenType.TIME,
 213        TokenType.TIMETZ,
 214        TokenType.TIMESTAMP,
 215        TokenType.TIMESTAMP_S,
 216        TokenType.TIMESTAMP_MS,
 217        TokenType.TIMESTAMP_NS,
 218        TokenType.TIMESTAMPTZ,
 219        TokenType.TIMESTAMPLTZ,
 220        TokenType.DATETIME,
 221        TokenType.DATETIME64,
 222        TokenType.DATE,
 223        TokenType.DATE32,
 224        TokenType.INT4RANGE,
 225        TokenType.INT4MULTIRANGE,
 226        TokenType.INT8RANGE,
 227        TokenType.INT8MULTIRANGE,
 228        TokenType.NUMRANGE,
 229        TokenType.NUMMULTIRANGE,
 230        TokenType.TSRANGE,
 231        TokenType.TSMULTIRANGE,
 232        TokenType.TSTZRANGE,
 233        TokenType.TSTZMULTIRANGE,
 234        TokenType.DATERANGE,
 235        TokenType.DATEMULTIRANGE,
 236        TokenType.DECIMAL,
 237        TokenType.UDECIMAL,
 238        TokenType.BIGDECIMAL,
 239        TokenType.UUID,
 240        TokenType.GEOGRAPHY,
 241        TokenType.GEOMETRY,
 242        TokenType.HLLSKETCH,
 243        TokenType.HSTORE,
 244        TokenType.PSEUDO_TYPE,
 245        TokenType.SUPER,
 246        TokenType.SERIAL,
 247        TokenType.SMALLSERIAL,
 248        TokenType.BIGSERIAL,
 249        TokenType.XML,
 250        TokenType.YEAR,
 251        TokenType.UNIQUEIDENTIFIER,
 252        TokenType.USERDEFINED,
 253        TokenType.MONEY,
 254        TokenType.SMALLMONEY,
 255        TokenType.ROWVERSION,
 256        TokenType.IMAGE,
 257        TokenType.VARIANT,
 258        TokenType.OBJECT,
 259        TokenType.OBJECT_IDENTIFIER,
 260        TokenType.INET,
 261        TokenType.IPADDRESS,
 262        TokenType.IPPREFIX,
 263        TokenType.IPV4,
 264        TokenType.IPV6,
 265        TokenType.UNKNOWN,
 266        TokenType.NULL,
 267        TokenType.NAME,
 268        *ENUM_TYPE_TOKENS,
 269        *NESTED_TYPE_TOKENS,
 270        *AGGREGATE_TYPE_TOKENS,
 271    }
 272
 273    SIGNED_TO_UNSIGNED_TYPE_TOKEN = {
 274        TokenType.BIGINT: TokenType.UBIGINT,
 275        TokenType.INT: TokenType.UINT,
 276        TokenType.MEDIUMINT: TokenType.UMEDIUMINT,
 277        TokenType.SMALLINT: TokenType.USMALLINT,
 278        TokenType.TINYINT: TokenType.UTINYINT,
 279        TokenType.DECIMAL: TokenType.UDECIMAL,
 280    }
 281
 282    SUBQUERY_PREDICATES = {
 283        TokenType.ANY: exp.Any,
 284        TokenType.ALL: exp.All,
 285        TokenType.EXISTS: exp.Exists,
 286        TokenType.SOME: exp.Any,
 287    }
 288
 289    RESERVED_TOKENS = {
 290        *Tokenizer.SINGLE_TOKENS.values(),
 291        TokenType.SELECT,
 292    }
 293
 294    DB_CREATABLES = {
 295        TokenType.DATABASE,
 296        TokenType.SCHEMA,
 297        TokenType.TABLE,
 298        TokenType.VIEW,
 299        TokenType.MODEL,
 300        TokenType.DICTIONARY,
 301        TokenType.SEQUENCE,
 302        TokenType.STORAGE_INTEGRATION,
 303    }
 304
 305    CREATABLES = {
 306        TokenType.COLUMN,
 307        TokenType.CONSTRAINT,
 308        TokenType.FUNCTION,
 309        TokenType.INDEX,
 310        TokenType.PROCEDURE,
 311        TokenType.FOREIGN_KEY,
 312        *DB_CREATABLES,
 313    }
 314
 315    # Tokens that can represent identifiers
 316    ID_VAR_TOKENS = {
 317        TokenType.VAR,
 318        TokenType.ANTI,
 319        TokenType.APPLY,
 320        TokenType.ASC,
 321        TokenType.ASOF,
 322        TokenType.AUTO_INCREMENT,
 323        TokenType.BEGIN,
 324        TokenType.BPCHAR,
 325        TokenType.CACHE,
 326        TokenType.CASE,
 327        TokenType.COLLATE,
 328        TokenType.COMMAND,
 329        TokenType.COMMENT,
 330        TokenType.COMMIT,
 331        TokenType.CONSTRAINT,
 332        TokenType.DEFAULT,
 333        TokenType.DELETE,
 334        TokenType.DESC,
 335        TokenType.DESCRIBE,
 336        TokenType.DICTIONARY,
 337        TokenType.DIV,
 338        TokenType.END,
 339        TokenType.EXECUTE,
 340        TokenType.ESCAPE,
 341        TokenType.FALSE,
 342        TokenType.FIRST,
 343        TokenType.FILTER,
 344        TokenType.FINAL,
 345        TokenType.FORMAT,
 346        TokenType.FULL,
 347        TokenType.IDENTIFIER,
 348        TokenType.IS,
 349        TokenType.ISNULL,
 350        TokenType.INTERVAL,
 351        TokenType.KEEP,
 352        TokenType.KILL,
 353        TokenType.LEFT,
 354        TokenType.LOAD,
 355        TokenType.MERGE,
 356        TokenType.NATURAL,
 357        TokenType.NEXT,
 358        TokenType.OFFSET,
 359        TokenType.OPERATOR,
 360        TokenType.ORDINALITY,
 361        TokenType.OVERLAPS,
 362        TokenType.OVERWRITE,
 363        TokenType.PARTITION,
 364        TokenType.PERCENT,
 365        TokenType.PIVOT,
 366        TokenType.PRAGMA,
 367        TokenType.RANGE,
 368        TokenType.RECURSIVE,
 369        TokenType.REFERENCES,
 370        TokenType.REFRESH,
 371        TokenType.REPLACE,
 372        TokenType.RIGHT,
 373        TokenType.ROW,
 374        TokenType.ROWS,
 375        TokenType.SEMI,
 376        TokenType.SET,
 377        TokenType.SETTINGS,
 378        TokenType.SHOW,
 379        TokenType.TEMPORARY,
 380        TokenType.TOP,
 381        TokenType.TRUE,
 382        TokenType.TRUNCATE,
 383        TokenType.UNIQUE,
 384        TokenType.UNPIVOT,
 385        TokenType.UPDATE,
 386        TokenType.USE,
 387        TokenType.VOLATILE,
 388        TokenType.WINDOW,
 389        *CREATABLES,
 390        *SUBQUERY_PREDICATES,
 391        *TYPE_TOKENS,
 392        *NO_PAREN_FUNCTIONS,
 393    }
 394
 395    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 396
 397    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 398        TokenType.ANTI,
 399        TokenType.APPLY,
 400        TokenType.ASOF,
 401        TokenType.FULL,
 402        TokenType.LEFT,
 403        TokenType.LOCK,
 404        TokenType.NATURAL,
 405        TokenType.OFFSET,
 406        TokenType.RIGHT,
 407        TokenType.SEMI,
 408        TokenType.WINDOW,
 409    }
 410
 411    ALIAS_TOKENS = ID_VAR_TOKENS
 412
 413    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 414
 415    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 416
 417    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 418
 419    FUNC_TOKENS = {
 420        TokenType.COLLATE,
 421        TokenType.COMMAND,
 422        TokenType.CURRENT_DATE,
 423        TokenType.CURRENT_DATETIME,
 424        TokenType.CURRENT_TIMESTAMP,
 425        TokenType.CURRENT_TIME,
 426        TokenType.CURRENT_USER,
 427        TokenType.FILTER,
 428        TokenType.FIRST,
 429        TokenType.FORMAT,
 430        TokenType.GLOB,
 431        TokenType.IDENTIFIER,
 432        TokenType.INDEX,
 433        TokenType.ISNULL,
 434        TokenType.ILIKE,
 435        TokenType.INSERT,
 436        TokenType.LIKE,
 437        TokenType.MERGE,
 438        TokenType.OFFSET,
 439        TokenType.PRIMARY_KEY,
 440        TokenType.RANGE,
 441        TokenType.REPLACE,
 442        TokenType.RLIKE,
 443        TokenType.ROW,
 444        TokenType.UNNEST,
 445        TokenType.VAR,
 446        TokenType.LEFT,
 447        TokenType.RIGHT,
 448        TokenType.SEQUENCE,
 449        TokenType.DATE,
 450        TokenType.DATETIME,
 451        TokenType.TABLE,
 452        TokenType.TIMESTAMP,
 453        TokenType.TIMESTAMPTZ,
 454        TokenType.TRUNCATE,
 455        TokenType.WINDOW,
 456        TokenType.XOR,
 457        *TYPE_TOKENS,
 458        *SUBQUERY_PREDICATES,
 459    }
 460
 461    CONJUNCTION = {
 462        TokenType.AND: exp.And,
 463        TokenType.OR: exp.Or,
 464    }
 465
 466    EQUALITY = {
 467        TokenType.COLON_EQ: exp.PropertyEQ,
 468        TokenType.EQ: exp.EQ,
 469        TokenType.NEQ: exp.NEQ,
 470        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 471    }
 472
 473    COMPARISON = {
 474        TokenType.GT: exp.GT,
 475        TokenType.GTE: exp.GTE,
 476        TokenType.LT: exp.LT,
 477        TokenType.LTE: exp.LTE,
 478    }
 479
 480    BITWISE = {
 481        TokenType.AMP: exp.BitwiseAnd,
 482        TokenType.CARET: exp.BitwiseXor,
 483        TokenType.PIPE: exp.BitwiseOr,
 484    }
 485
 486    TERM = {
 487        TokenType.DASH: exp.Sub,
 488        TokenType.PLUS: exp.Add,
 489        TokenType.MOD: exp.Mod,
 490        TokenType.COLLATE: exp.Collate,
 491    }
 492
 493    FACTOR = {
 494        TokenType.DIV: exp.IntDiv,
 495        TokenType.LR_ARROW: exp.Distance,
 496        TokenType.SLASH: exp.Div,
 497        TokenType.STAR: exp.Mul,
 498    }
 499
 500    EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {}
 501
 502    TIMES = {
 503        TokenType.TIME,
 504        TokenType.TIMETZ,
 505    }
 506
 507    TIMESTAMPS = {
 508        TokenType.TIMESTAMP,
 509        TokenType.TIMESTAMPTZ,
 510        TokenType.TIMESTAMPLTZ,
 511        *TIMES,
 512    }
 513
 514    SET_OPERATIONS = {
 515        TokenType.UNION,
 516        TokenType.INTERSECT,
 517        TokenType.EXCEPT,
 518    }
 519
 520    JOIN_METHODS = {
 521        TokenType.ASOF,
 522        TokenType.NATURAL,
 523        TokenType.POSITIONAL,
 524    }
 525
 526    JOIN_SIDES = {
 527        TokenType.LEFT,
 528        TokenType.RIGHT,
 529        TokenType.FULL,
 530    }
 531
 532    JOIN_KINDS = {
 533        TokenType.INNER,
 534        TokenType.OUTER,
 535        TokenType.CROSS,
 536        TokenType.SEMI,
 537        TokenType.ANTI,
 538    }
 539
 540    JOIN_HINTS: t.Set[str] = set()
 541
 542    LAMBDAS = {
 543        TokenType.ARROW: lambda self, expressions: self.expression(
 544            exp.Lambda,
 545            this=self._replace_lambda(
 546                self._parse_conjunction(),
 547                {node.name for node in expressions},
 548            ),
 549            expressions=expressions,
 550        ),
 551        TokenType.FARROW: lambda self, expressions: self.expression(
 552            exp.Kwarg,
 553            this=exp.var(expressions[0].name),
 554            expression=self._parse_conjunction(),
 555        ),
 556    }
 557
 558    COLUMN_OPERATORS = {
 559        TokenType.DOT: None,
 560        TokenType.DCOLON: lambda self, this, to: self.expression(
 561            exp.Cast if self.STRICT_CAST else exp.TryCast,
 562            this=this,
 563            to=to,
 564        ),
 565        TokenType.ARROW: lambda self, this, path: self.expression(
 566            exp.JSONExtract,
 567            this=this,
 568            expression=self.dialect.to_json_path(path),
 569            only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
 570        ),
 571        TokenType.DARROW: lambda self, this, path: self.expression(
 572            exp.JSONExtractScalar,
 573            this=this,
 574            expression=self.dialect.to_json_path(path),
 575            only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
 576        ),
 577        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 578            exp.JSONBExtract,
 579            this=this,
 580            expression=path,
 581        ),
 582        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 583            exp.JSONBExtractScalar,
 584            this=this,
 585            expression=path,
 586        ),
 587        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 588            exp.JSONBContains,
 589            this=this,
 590            expression=key,
 591        ),
 592    }
 593
 594    EXPRESSION_PARSERS = {
 595        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
 596        exp.Column: lambda self: self._parse_column(),
 597        exp.Condition: lambda self: self._parse_conjunction(),
 598        exp.DataType: lambda self: self._parse_types(allow_identifiers=False),
 599        exp.Expression: lambda self: self._parse_expression(),
 600        exp.From: lambda self: self._parse_from(),
 601        exp.Group: lambda self: self._parse_group(),
 602        exp.Having: lambda self: self._parse_having(),
 603        exp.Identifier: lambda self: self._parse_id_var(),
 604        exp.Join: lambda self: self._parse_join(),
 605        exp.Lambda: lambda self: self._parse_lambda(),
 606        exp.Lateral: lambda self: self._parse_lateral(),
 607        exp.Limit: lambda self: self._parse_limit(),
 608        exp.Offset: lambda self: self._parse_offset(),
 609        exp.Order: lambda self: self._parse_order(),
 610        exp.Ordered: lambda self: self._parse_ordered(),
 611        exp.Properties: lambda self: self._parse_properties(),
 612        exp.Qualify: lambda self: self._parse_qualify(),
 613        exp.Returning: lambda self: self._parse_returning(),
 614        exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
 615        exp.Table: lambda self: self._parse_table_parts(),
 616        exp.TableAlias: lambda self: self._parse_table_alias(),
 617        exp.When: lambda self: seq_get(self._parse_when_matched(), 0),
 618        exp.Where: lambda self: self._parse_where(),
 619        exp.Window: lambda self: self._parse_named_window(),
 620        exp.With: lambda self: self._parse_with(),
 621        "JOIN_TYPE": lambda self: self._parse_join_parts(),
 622    }
 623
 624    STATEMENT_PARSERS = {
 625        TokenType.ALTER: lambda self: self._parse_alter(),
 626        TokenType.BEGIN: lambda self: self._parse_transaction(),
 627        TokenType.CACHE: lambda self: self._parse_cache(),
 628        TokenType.COMMENT: lambda self: self._parse_comment(),
 629        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 630        TokenType.CREATE: lambda self: self._parse_create(),
 631        TokenType.DELETE: lambda self: self._parse_delete(),
 632        TokenType.DESC: lambda self: self._parse_describe(),
 633        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 634        TokenType.DROP: lambda self: self._parse_drop(),
 635        TokenType.INSERT: lambda self: self._parse_insert(),
 636        TokenType.KILL: lambda self: self._parse_kill(),
 637        TokenType.LOAD: lambda self: self._parse_load(),
 638        TokenType.MERGE: lambda self: self._parse_merge(),
 639        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 640        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 641        TokenType.REFRESH: lambda self: self._parse_refresh(),
 642        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 643        TokenType.SET: lambda self: self._parse_set(),
 644        TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
 645        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 646        TokenType.UPDATE: lambda self: self._parse_update(),
 647        TokenType.USE: lambda self: self.expression(
 648            exp.Use,
 649            kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False),
 650            this=self._parse_table(schema=False),
 651        ),
 652    }
 653
 654    UNARY_PARSERS = {
 655        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 656        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 657        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 658        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 659        TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()),
 660        TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()),
 661    }
 662
 663    STRING_PARSERS = {
 664        TokenType.HEREDOC_STRING: lambda self, token: self.expression(
 665            exp.RawString, this=token.text
 666        ),
 667        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 668            exp.National, this=token.text
 669        ),
 670        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 671        TokenType.STRING: lambda self, token: self.expression(
 672            exp.Literal, this=token.text, is_string=True
 673        ),
 674        TokenType.UNICODE_STRING: lambda self, token: self.expression(
 675            exp.UnicodeString,
 676            this=token.text,
 677            escape=self._match_text_seq("UESCAPE") and self._parse_string(),
 678        ),
 679    }
 680
 681    NUMERIC_PARSERS = {
 682        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 683        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 684        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 685        TokenType.NUMBER: lambda self, token: self.expression(
 686            exp.Literal, this=token.text, is_string=False
 687        ),
 688    }
 689
 690    PRIMARY_PARSERS = {
 691        **STRING_PARSERS,
 692        **NUMERIC_PARSERS,
 693        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 694        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 695        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 696        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 697        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 698        TokenType.STAR: lambda self, _: self.expression(
 699            exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()}
 700        ),
 701    }
 702
 703    PLACEHOLDER_PARSERS = {
 704        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 705        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 706        TokenType.COLON: lambda self: (
 707            self.expression(exp.Placeholder, this=self._prev.text)
 708            if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS)
 709            else None
 710        ),
 711    }
 712
 713    RANGE_PARSERS = {
 714        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 715        TokenType.GLOB: binary_range_parser(exp.Glob),
 716        TokenType.ILIKE: binary_range_parser(exp.ILike),
 717        TokenType.IN: lambda self, this: self._parse_in(this),
 718        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 719        TokenType.IS: lambda self, this: self._parse_is(this),
 720        TokenType.LIKE: binary_range_parser(exp.Like),
 721        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 722        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 723        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 724        TokenType.FOR: lambda self, this: self._parse_comprehension(this),
 725    }
 726
 727    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 728        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 729        "AUTO": lambda self: self._parse_auto_property(),
 730        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 731        "BACKUP": lambda self: self.expression(
 732            exp.BackupProperty, this=self._parse_var(any_token=True)
 733        ),
 734        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 735        "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
 736        "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
 737        "CHECKSUM": lambda self: self._parse_checksum(),
 738        "CLUSTER BY": lambda self: self._parse_cluster(),
 739        "CLUSTERED": lambda self: self._parse_clustered_by(),
 740        "COLLATE": lambda self, **kwargs: self._parse_property_assignment(
 741            exp.CollateProperty, **kwargs
 742        ),
 743        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 744        "CONTAINS": lambda self: self._parse_contains_property(),
 745        "COPY": lambda self: self._parse_copy_property(),
 746        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 747        "DEFINER": lambda self: self._parse_definer(),
 748        "DETERMINISTIC": lambda self: self.expression(
 749            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 750        ),
 751        "DISTKEY": lambda self: self._parse_distkey(),
 752        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 753        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 754        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 755        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 756        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 757        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 758        "FREESPACE": lambda self: self._parse_freespace(),
 759        "GLOBAL": lambda self: self.expression(exp.GlobalProperty),
 760        "HEAP": lambda self: self.expression(exp.HeapProperty),
 761        "ICEBERG": lambda self: self.expression(exp.IcebergProperty),
 762        "IMMUTABLE": lambda self: self.expression(
 763            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 764        ),
 765        "INHERITS": lambda self: self.expression(
 766            exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table)
 767        ),
 768        "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()),
 769        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 770        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 771        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
 772        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
 773        "LIKE": lambda self: self._parse_create_like(),
 774        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 775        "LOCK": lambda self: self._parse_locking(),
 776        "LOCKING": lambda self: self._parse_locking(),
 777        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 778        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 779        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 780        "MODIFIES": lambda self: self._parse_modifies_property(),
 781        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 782        "NO": lambda self: self._parse_no_property(),
 783        "ON": lambda self: self._parse_on_property(),
 784        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 785        "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()),
 786        "PARTITION": lambda self: self._parse_partitioned_of(),
 787        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 788        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 789        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 790        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
 791        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
 792        "READS": lambda self: self._parse_reads_property(),
 793        "REMOTE": lambda self: self._parse_remote_with_connection(),
 794        "RETURNS": lambda self: self._parse_returns(),
 795        "ROW": lambda self: self._parse_row(),
 796        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 797        "SAMPLE": lambda self: self.expression(
 798            exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise()
 799        ),
 800        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 801        "SETTINGS": lambda self: self.expression(
 802            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 803        ),
 804        "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty),
 805        "SORTKEY": lambda self: self._parse_sortkey(),
 806        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
 807        "STABLE": lambda self: self.expression(
 808            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 809        ),
 810        "STORED": lambda self: self._parse_stored(),
 811        "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
 812        "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(),
 813        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 814        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 815        "TO": lambda self: self._parse_to_table(),
 816        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 817        "TRANSFORM": lambda self: self.expression(
 818            exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression)
 819        ),
 820        "TTL": lambda self: self._parse_ttl(),
 821        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 822        "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty),
 823        "VOLATILE": lambda self: self._parse_volatile_property(),
 824        "WITH": lambda self: self._parse_with_property(),
 825    }
 826
 827    CONSTRAINT_PARSERS = {
 828        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 829        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 830        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 831        "CHARACTER SET": lambda self: self.expression(
 832            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 833        ),
 834        "CHECK": lambda self: self.expression(
 835            exp.CheckColumnConstraint,
 836            this=self._parse_wrapped(self._parse_conjunction),
 837            enforced=self._match_text_seq("ENFORCED"),
 838        ),
 839        "COLLATE": lambda self: self.expression(
 840            exp.CollateColumnConstraint, this=self._parse_var()
 841        ),
 842        "COMMENT": lambda self: self.expression(
 843            exp.CommentColumnConstraint, this=self._parse_string()
 844        ),
 845        "COMPRESS": lambda self: self._parse_compress(),
 846        "CLUSTERED": lambda self: self.expression(
 847            exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
 848        ),
 849        "NONCLUSTERED": lambda self: self.expression(
 850            exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
 851        ),
 852        "DEFAULT": lambda self: self.expression(
 853            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 854        ),
 855        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 856        "EPHEMERAL": lambda self: self.expression(
 857            exp.EphemeralColumnConstraint, this=self._parse_bitwise()
 858        ),
 859        "EXCLUDE": lambda self: self.expression(
 860            exp.ExcludeColumnConstraint, this=self._parse_index_params()
 861        ),
 862        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 863        "FORMAT": lambda self: self.expression(
 864            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 865        ),
 866        "GENERATED": lambda self: self._parse_generated_as_identity(),
 867        "IDENTITY": lambda self: self._parse_auto_increment(),
 868        "INLINE": lambda self: self._parse_inline(),
 869        "LIKE": lambda self: self._parse_create_like(),
 870        "NOT": lambda self: self._parse_not_constraint(),
 871        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 872        "ON": lambda self: (
 873            self._match(TokenType.UPDATE)
 874            and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function())
 875        )
 876        or self.expression(exp.OnProperty, this=self._parse_id_var()),
 877        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 878        "PERIOD": lambda self: self._parse_period_for_system_time(),
 879        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 880        "REFERENCES": lambda self: self._parse_references(match=False),
 881        "TITLE": lambda self: self.expression(
 882            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 883        ),
 884        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 885        "UNIQUE": lambda self: self._parse_unique(),
 886        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 887        "WITH": lambda self: self.expression(
 888            exp.Properties, expressions=self._parse_wrapped_properties()
 889        ),
 890    }
 891
 892    ALTER_PARSERS = {
 893        "ADD": lambda self: self._parse_alter_table_add(),
 894        "ALTER": lambda self: self._parse_alter_table_alter(),
 895        "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True),
 896        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 897        "DROP": lambda self: self._parse_alter_table_drop(),
 898        "RENAME": lambda self: self._parse_alter_table_rename(),
 899    }
 900
 901    SCHEMA_UNNAMED_CONSTRAINTS = {
 902        "CHECK",
 903        "EXCLUDE",
 904        "FOREIGN KEY",
 905        "LIKE",
 906        "PERIOD",
 907        "PRIMARY KEY",
 908        "UNIQUE",
 909    }
 910
 911    NO_PAREN_FUNCTION_PARSERS = {
 912        "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 913        "CASE": lambda self: self._parse_case(),
 914        "IF": lambda self: self._parse_if(),
 915        "NEXT": lambda self: self._parse_next_value_for(),
 916    }
 917
 918    INVALID_FUNC_NAME_TOKENS = {
 919        TokenType.IDENTIFIER,
 920        TokenType.STRING,
 921    }
 922
 923    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 924
 925    KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice)
 926
 927    FUNCTION_PARSERS = {
 928        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 929        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 930        "DECODE": lambda self: self._parse_decode(),
 931        "EXTRACT": lambda self: self._parse_extract(),
 932        "JSON_OBJECT": lambda self: self._parse_json_object(),
 933        "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True),
 934        "JSON_TABLE": lambda self: self._parse_json_table(),
 935        "MATCH": lambda self: self._parse_match_against(),
 936        "OPENJSON": lambda self: self._parse_open_json(),
 937        "POSITION": lambda self: self._parse_position(),
 938        "PREDICT": lambda self: self._parse_predict(),
 939        "SAFE_CAST": lambda self: self._parse_cast(False, safe=True),
 940        "STRING_AGG": lambda self: self._parse_string_agg(),
 941        "SUBSTRING": lambda self: self._parse_substring(),
 942        "TRIM": lambda self: self._parse_trim(),
 943        "TRY_CAST": lambda self: self._parse_cast(False, safe=True),
 944        "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True),
 945    }
 946
 947    QUERY_MODIFIER_PARSERS = {
 948        TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()),
 949        TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()),
 950        TokenType.WHERE: lambda self: ("where", self._parse_where()),
 951        TokenType.GROUP_BY: lambda self: ("group", self._parse_group()),
 952        TokenType.HAVING: lambda self: ("having", self._parse_having()),
 953        TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()),
 954        TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()),
 955        TokenType.ORDER_BY: lambda self: ("order", self._parse_order()),
 956        TokenType.LIMIT: lambda self: ("limit", self._parse_limit()),
 957        TokenType.FETCH: lambda self: ("limit", self._parse_limit()),
 958        TokenType.OFFSET: lambda self: ("offset", self._parse_offset()),
 959        TokenType.FOR: lambda self: ("locks", self._parse_locks()),
 960        TokenType.LOCK: lambda self: ("locks", self._parse_locks()),
 961        TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
 962        TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
 963        TokenType.CLUSTER_BY: lambda self: (
 964            "cluster",
 965            self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
 966        ),
 967        TokenType.DISTRIBUTE_BY: lambda self: (
 968            "distribute",
 969            self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY),
 970        ),
 971        TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)),
 972        TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)),
 973        TokenType.START_WITH: lambda self: ("connect", self._parse_connect()),
 974    }
 975
 976    SET_PARSERS = {
 977        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 978        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 979        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 980        "TRANSACTION": lambda self: self._parse_set_transaction(),
 981    }
 982
 983    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 984
 985    TYPE_LITERAL_PARSERS = {
 986        exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this),
 987    }
 988
 989    DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
 990
 991    PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
 992
 993    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 994    TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = {
 995        "ISOLATION": (
 996            ("LEVEL", "REPEATABLE", "READ"),
 997            ("LEVEL", "READ", "COMMITTED"),
 998            ("LEVEL", "READ", "UNCOMITTED"),
 999            ("LEVEL", "SERIALIZABLE"),
1000        ),
1001        "READ": ("WRITE", "ONLY"),
1002    }
1003
1004    CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys(
1005        ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()
1006    )
1007    CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE")
1008
1009    CREATE_SEQUENCE: OPTIONS_TYPE = {
1010        "SCALE": ("EXTEND", "NOEXTEND"),
1011        "SHARD": ("EXTEND", "NOEXTEND"),
1012        "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"),
1013        **dict.fromkeys(
1014            (
1015                "SESSION",
1016                "GLOBAL",
1017                "KEEP",
1018                "NOKEEP",
1019                "ORDER",
1020                "NOORDER",
1021                "NOCACHE",
1022                "CYCLE",
1023                "NOCYCLE",
1024                "NOMINVALUE",
1025                "NOMAXVALUE",
1026                "NOSCALE",
1027                "NOSHARD",
1028            ),
1029            tuple(),
1030        ),
1031    }
1032
1033    ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")}
1034
1035    USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple())
1036
1037    CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",))
1038
1039    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
1040
1041    CLONE_KEYWORDS = {"CLONE", "COPY"}
1042    HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"}
1043
1044    OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"}
1045
1046    OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
1047
1048    TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
1049
1050    VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"}
1051
1052    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
1053    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
1054    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
1055
1056    JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS}
1057
1058    FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT}
1059
1060    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
1061
1062    DISTINCT_TOKENS = {TokenType.DISTINCT}
1063
1064    NULL_TOKENS = {TokenType.NULL}
1065
1066    UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS
1067
1068    SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT}
1069
1070    STRICT_CAST = True
1071
1072    PREFIXED_PIVOT_COLUMNS = False
1073    IDENTIFY_PIVOT_STRINGS = False
1074
1075    LOG_DEFAULTS_TO_LN = False
1076
1077    # Whether ADD is present for each column added by ALTER TABLE
1078    ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True
1079
1080    # Whether the table sample clause expects CSV syntax
1081    TABLESAMPLE_CSV = False
1082
1083    # Whether the SET command needs a delimiter (e.g. "=") for assignments
1084    SET_REQUIRES_ASSIGNMENT_DELIMITER = True
1085
1086    # Whether the TRIM function expects the characters to trim as its first argument
1087    TRIM_PATTERN_FIRST = False
1088
1089    # Whether string aliases are supported `SELECT COUNT(*) 'count'`
1090    STRING_ALIASES = False
1091
1092    # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand)
1093    MODIFIERS_ATTACHED_TO_UNION = True
1094    UNION_MODIFIERS = {"order", "limit", "offset"}
1095
1096    # Whether to parse IF statements that aren't followed by a left parenthesis as commands
1097    NO_PAREN_IF_COMMANDS = True
1098
1099    # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres)
1100    JSON_ARROWS_REQUIRE_JSON_TYPE = False
1101
1102    # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause.
1103    # If this is True and '(' is not found, the keyword will be treated as an identifier
1104    VALUES_FOLLOWED_BY_PAREN = True
1105
1106    # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift)
1107    SUPPORTS_IMPLICIT_UNNEST = False
1108
1109    # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS
1110    INTERVAL_SPANS = True
1111
1112    # Whether a PARTITION clause can follow a table reference
1113    SUPPORTS_PARTITION_SELECTION = False
1114
1115    __slots__ = (
1116        "error_level",
1117        "error_message_context",
1118        "max_errors",
1119        "dialect",
1120        "sql",
1121        "errors",
1122        "_tokens",
1123        "_index",
1124        "_curr",
1125        "_next",
1126        "_prev",
1127        "_prev_comments",
1128    )
1129
1130    # Autofilled
1131    SHOW_TRIE: t.Dict = {}
1132    SET_TRIE: t.Dict = {}
1133
1134    def __init__(
1135        self,
1136        error_level: t.Optional[ErrorLevel] = None,
1137        error_message_context: int = 100,
1138        max_errors: int = 3,
1139        dialect: DialectType = None,
1140    ):
1141        from sqlglot.dialects import Dialect
1142
1143        self.error_level = error_level or ErrorLevel.IMMEDIATE
1144        self.error_message_context = error_message_context
1145        self.max_errors = max_errors
1146        self.dialect = Dialect.get_or_raise(dialect)
1147        self.reset()
1148
1149    def reset(self):
1150        self.sql = ""
1151        self.errors = []
1152        self._tokens = []
1153        self._index = 0
1154        self._curr = None
1155        self._next = None
1156        self._prev = None
1157        self._prev_comments = None
1158
1159    def parse(
1160        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
1161    ) -> t.List[t.Optional[exp.Expression]]:
1162        """
1163        Parses a list of tokens and returns a list of syntax trees, one tree
1164        per parsed SQL statement.
1165
1166        Args:
1167            raw_tokens: The list of tokens.
1168            sql: The original SQL string, used to produce helpful debug messages.
1169
1170        Returns:
1171            The list of the produced syntax trees.
1172        """
1173        return self._parse(
1174            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
1175        )
1176
1177    def parse_into(
1178        self,
1179        expression_types: exp.IntoType,
1180        raw_tokens: t.List[Token],
1181        sql: t.Optional[str] = None,
1182    ) -> t.List[t.Optional[exp.Expression]]:
1183        """
1184        Parses a list of tokens into a given Expression type. If a collection of Expression
1185        types is given instead, this method will try to parse the token list into each one
1186        of them, stopping at the first for which the parsing succeeds.
1187
1188        Args:
1189            expression_types: The expression type(s) to try and parse the token list into.
1190            raw_tokens: The list of tokens.
1191            sql: The original SQL string, used to produce helpful debug messages.
1192
1193        Returns:
1194            The target Expression.
1195        """
1196        errors = []
1197        for expression_type in ensure_list(expression_types):
1198            parser = self.EXPRESSION_PARSERS.get(expression_type)
1199            if not parser:
1200                raise TypeError(f"No parser registered for {expression_type}")
1201
1202            try:
1203                return self._parse(parser, raw_tokens, sql)
1204            except ParseError as e:
1205                e.errors[0]["into_expression"] = expression_type
1206                errors.append(e)
1207
1208        raise ParseError(
1209            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
1210            errors=merge_errors(errors),
1211        ) from errors[-1]
1212
1213    def _parse(
1214        self,
1215        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
1216        raw_tokens: t.List[Token],
1217        sql: t.Optional[str] = None,
1218    ) -> t.List[t.Optional[exp.Expression]]:
1219        self.reset()
1220        self.sql = sql or ""
1221
1222        total = len(raw_tokens)
1223        chunks: t.List[t.List[Token]] = [[]]
1224
1225        for i, token in enumerate(raw_tokens):
1226            if token.token_type == TokenType.SEMICOLON:
1227                if i < total - 1:
1228                    chunks.append([])
1229            else:
1230                chunks[-1].append(token)
1231
1232        expressions = []
1233
1234        for tokens in chunks:
1235            self._index = -1
1236            self._tokens = tokens
1237            self._advance()
1238
1239            expressions.append(parse_method(self))
1240
1241            if self._index < len(self._tokens):
1242                self.raise_error("Invalid expression / Unexpected token")
1243
1244            self.check_errors()
1245
1246        return expressions
1247
1248    def check_errors(self) -> None:
1249        """Logs or raises any found errors, depending on the chosen error level setting."""
1250        if self.error_level == ErrorLevel.WARN:
1251            for error in self.errors:
1252                logger.error(str(error))
1253        elif self.error_level == ErrorLevel.RAISE and self.errors:
1254            raise ParseError(
1255                concat_messages(self.errors, self.max_errors),
1256                errors=merge_errors(self.errors),
1257            )
1258
1259    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
1260        """
1261        Appends an error in the list of recorded errors or raises it, depending on the chosen
1262        error level setting.
1263        """
1264        token = token or self._curr or self._prev or Token.string("")
1265        start = token.start
1266        end = token.end + 1
1267        start_context = self.sql[max(start - self.error_message_context, 0) : start]
1268        highlight = self.sql[start:end]
1269        end_context = self.sql[end : end + self.error_message_context]
1270
1271        error = ParseError.new(
1272            f"{message}. Line {token.line}, Col: {token.col}.\n"
1273            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
1274            description=message,
1275            line=token.line,
1276            col=token.col,
1277            start_context=start_context,
1278            highlight=highlight,
1279            end_context=end_context,
1280        )
1281
1282        if self.error_level == ErrorLevel.IMMEDIATE:
1283            raise error
1284
1285        self.errors.append(error)
1286
1287    def expression(
1288        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
1289    ) -> E:
1290        """
1291        Creates a new, validated Expression.
1292
1293        Args:
1294            exp_class: The expression class to instantiate.
1295            comments: An optional list of comments to attach to the expression.
1296            kwargs: The arguments to set for the expression along with their respective values.
1297
1298        Returns:
1299            The target expression.
1300        """
1301        instance = exp_class(**kwargs)
1302        instance.add_comments(comments) if comments else self._add_comments(instance)
1303        return self.validate_expression(instance)
1304
1305    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
1306        if expression and self._prev_comments:
1307            expression.add_comments(self._prev_comments)
1308            self._prev_comments = None
1309
1310    def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E:
1311        """
1312        Validates an Expression, making sure that all its mandatory arguments are set.
1313
1314        Args:
1315            expression: The expression to validate.
1316            args: An optional list of items that was used to instantiate the expression, if it's a Func.
1317
1318        Returns:
1319            The validated expression.
1320        """
1321        if self.error_level != ErrorLevel.IGNORE:
1322            for error_message in expression.error_messages(args):
1323                self.raise_error(error_message)
1324
1325        return expression
1326
1327    def _find_sql(self, start: Token, end: Token) -> str:
1328        return self.sql[start.start : end.end + 1]
1329
1330    def _is_connected(self) -> bool:
1331        return self._prev and self._curr and self._prev.end + 1 == self._curr.start
1332
1333    def _advance(self, times: int = 1) -> None:
1334        self._index += times
1335        self._curr = seq_get(self._tokens, self._index)
1336        self._next = seq_get(self._tokens, self._index + 1)
1337
1338        if self._index > 0:
1339            self._prev = self._tokens[self._index - 1]
1340            self._prev_comments = self._prev.comments
1341        else:
1342            self._prev = None
1343            self._prev_comments = None
1344
1345    def _retreat(self, index: int) -> None:
1346        if index != self._index:
1347            self._advance(index - self._index)
1348
1349    def _warn_unsupported(self) -> None:
1350        if len(self._tokens) <= 1:
1351            return
1352
1353        # We use _find_sql because self.sql may comprise multiple chunks, and we're only
1354        # interested in emitting a warning for the one being currently processed.
1355        sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context]
1356
1357        logger.warning(
1358            f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'."
1359        )
1360
1361    def _parse_command(self) -> exp.Command:
1362        self._warn_unsupported()
1363        return self.expression(
1364            exp.Command, this=self._prev.text.upper(), expression=self._parse_string()
1365        )
1366
1367    def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]:
1368        """
1369        Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can
1370        be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting
1371        the parser state accordingly
1372        """
1373        index = self._index
1374        error_level = self.error_level
1375
1376        self.error_level = ErrorLevel.IMMEDIATE
1377        try:
1378            this = parse_method()
1379        except ParseError:
1380            this = None
1381        finally:
1382            if not this or retreat:
1383                self._retreat(index)
1384            self.error_level = error_level
1385
1386        return this
1387
1388    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1389        start = self._prev
1390        exists = self._parse_exists() if allow_exists else None
1391
1392        self._match(TokenType.ON)
1393
1394        materialized = self._match_text_seq("MATERIALIZED")
1395        kind = self._match_set(self.CREATABLES) and self._prev
1396        if not kind:
1397            return self._parse_as_command(start)
1398
1399        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1400            this = self._parse_user_defined_function(kind=kind.token_type)
1401        elif kind.token_type == TokenType.TABLE:
1402            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1403        elif kind.token_type == TokenType.COLUMN:
1404            this = self._parse_column()
1405        else:
1406            this = self._parse_id_var()
1407
1408        self._match(TokenType.IS)
1409
1410        return self.expression(
1411            exp.Comment,
1412            this=this,
1413            kind=kind.text,
1414            expression=self._parse_string(),
1415            exists=exists,
1416            materialized=materialized,
1417        )
1418
1419    def _parse_to_table(
1420        self,
1421    ) -> exp.ToTableProperty:
1422        table = self._parse_table_parts(schema=True)
1423        return self.expression(exp.ToTableProperty, this=table)
1424
1425    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1426    def _parse_ttl(self) -> exp.Expression:
1427        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1428            this = self._parse_bitwise()
1429
1430            if self._match_text_seq("DELETE"):
1431                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1432            if self._match_text_seq("RECOMPRESS"):
1433                return self.expression(
1434                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1435                )
1436            if self._match_text_seq("TO", "DISK"):
1437                return self.expression(
1438                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1439                )
1440            if self._match_text_seq("TO", "VOLUME"):
1441                return self.expression(
1442                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1443                )
1444
1445            return this
1446
1447        expressions = self._parse_csv(_parse_ttl_action)
1448        where = self._parse_where()
1449        group = self._parse_group()
1450
1451        aggregates = None
1452        if group and self._match(TokenType.SET):
1453            aggregates = self._parse_csv(self._parse_set_item)
1454
1455        return self.expression(
1456            exp.MergeTreeTTL,
1457            expressions=expressions,
1458            where=where,
1459            group=group,
1460            aggregates=aggregates,
1461        )
1462
1463    def _parse_statement(self) -> t.Optional[exp.Expression]:
1464        if self._curr is None:
1465            return None
1466
1467        if self._match_set(self.STATEMENT_PARSERS):
1468            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1469
1470        if self._match_set(Tokenizer.COMMANDS):
1471            return self._parse_command()
1472
1473        expression = self._parse_expression()
1474        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1475        return self._parse_query_modifiers(expression)
1476
1477    def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command:
1478        start = self._prev
1479        temporary = self._match(TokenType.TEMPORARY)
1480        materialized = self._match_text_seq("MATERIALIZED")
1481
1482        kind = self._match_set(self.CREATABLES) and self._prev.text
1483        if not kind:
1484            return self._parse_as_command(start)
1485
1486        if_exists = exists or self._parse_exists()
1487        table = self._parse_table_parts(
1488            schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA
1489        )
1490
1491        if self._match(TokenType.L_PAREN, advance=False):
1492            expressions = self._parse_wrapped_csv(self._parse_types)
1493        else:
1494            expressions = None
1495
1496        return self.expression(
1497            exp.Drop,
1498            comments=start.comments,
1499            exists=if_exists,
1500            this=table,
1501            expressions=expressions,
1502            kind=kind,
1503            temporary=temporary,
1504            materialized=materialized,
1505            cascade=self._match_text_seq("CASCADE"),
1506            constraints=self._match_text_seq("CONSTRAINTS"),
1507            purge=self._match_text_seq("PURGE"),
1508        )
1509
1510    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1511        return (
1512            self._match_text_seq("IF")
1513            and (not not_ or self._match(TokenType.NOT))
1514            and self._match(TokenType.EXISTS)
1515        )
1516
1517    def _parse_create(self) -> exp.Create | exp.Command:
1518        # Note: this can't be None because we've matched a statement parser
1519        start = self._prev
1520        comments = self._prev_comments
1521
1522        replace = (
1523            start.token_type == TokenType.REPLACE
1524            or self._match_pair(TokenType.OR, TokenType.REPLACE)
1525            or self._match_pair(TokenType.OR, TokenType.ALTER)
1526        )
1527
1528        unique = self._match(TokenType.UNIQUE)
1529
1530        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1531            self._advance()
1532
1533        properties = None
1534        create_token = self._match_set(self.CREATABLES) and self._prev
1535
1536        if not create_token:
1537            # exp.Properties.Location.POST_CREATE
1538            properties = self._parse_properties()
1539            create_token = self._match_set(self.CREATABLES) and self._prev
1540
1541            if not properties or not create_token:
1542                return self._parse_as_command(start)
1543
1544        exists = self._parse_exists(not_=True)
1545        this = None
1546        expression: t.Optional[exp.Expression] = None
1547        indexes = None
1548        no_schema_binding = None
1549        begin = None
1550        end = None
1551        clone = None
1552
1553        def extend_props(temp_props: t.Optional[exp.Properties]) -> None:
1554            nonlocal properties
1555            if properties and temp_props:
1556                properties.expressions.extend(temp_props.expressions)
1557            elif temp_props:
1558                properties = temp_props
1559
1560        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1561            this = self._parse_user_defined_function(kind=create_token.token_type)
1562
1563            # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
1564            extend_props(self._parse_properties())
1565
1566            expression = self._match(TokenType.ALIAS) and self._parse_heredoc()
1567
1568            if not expression:
1569                if self._match(TokenType.COMMAND):
1570                    expression = self._parse_as_command(self._prev)
1571                else:
1572                    begin = self._match(TokenType.BEGIN)
1573                    return_ = self._match_text_seq("RETURN")
1574
1575                    if self._match(TokenType.STRING, advance=False):
1576                        # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property
1577                        # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement
1578                        expression = self._parse_string()
1579                        extend_props(self._parse_properties())
1580                    else:
1581                        expression = self._parse_statement()
1582
1583                    end = self._match_text_seq("END")
1584
1585                    if return_:
1586                        expression = self.expression(exp.Return, this=expression)
1587        elif create_token.token_type == TokenType.INDEX:
1588            this = self._parse_index(index=self._parse_id_var())
1589        elif create_token.token_type in self.DB_CREATABLES:
1590            table_parts = self._parse_table_parts(
1591                schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA
1592            )
1593
1594            # exp.Properties.Location.POST_NAME
1595            self._match(TokenType.COMMA)
1596            extend_props(self._parse_properties(before=True))
1597
1598            this = self._parse_schema(this=table_parts)
1599
1600            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1601            extend_props(self._parse_properties())
1602
1603            self._match(TokenType.ALIAS)
1604            if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
1605                # exp.Properties.Location.POST_ALIAS
1606                extend_props(self._parse_properties())
1607
1608            if create_token.token_type == TokenType.SEQUENCE:
1609                expression = self._parse_types()
1610                extend_props(self._parse_properties())
1611            else:
1612                expression = self._parse_ddl_select()
1613
1614            if create_token.token_type == TokenType.TABLE:
1615                # exp.Properties.Location.POST_EXPRESSION
1616                extend_props(self._parse_properties())
1617
1618                indexes = []
1619                while True:
1620                    index = self._parse_index()
1621
1622                    # exp.Properties.Location.POST_INDEX
1623                    extend_props(self._parse_properties())
1624
1625                    if not index:
1626                        break
1627                    else:
1628                        self._match(TokenType.COMMA)
1629                        indexes.append(index)
1630            elif create_token.token_type == TokenType.VIEW:
1631                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1632                    no_schema_binding = True
1633
1634            shallow = self._match_text_seq("SHALLOW")
1635
1636            if self._match_texts(self.CLONE_KEYWORDS):
1637                copy = self._prev.text.lower() == "copy"
1638                clone = self.expression(
1639                    exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy
1640                )
1641
1642        if self._curr:
1643            return self._parse_as_command(start)
1644
1645        return self.expression(
1646            exp.Create,
1647            comments=comments,
1648            this=this,
1649            kind=create_token.text.upper(),
1650            replace=replace,
1651            unique=unique,
1652            expression=expression,
1653            exists=exists,
1654            properties=properties,
1655            indexes=indexes,
1656            no_schema_binding=no_schema_binding,
1657            begin=begin,
1658            end=end,
1659            clone=clone,
1660        )
1661
1662    def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]:
1663        seq = exp.SequenceProperties()
1664
1665        options = []
1666        index = self._index
1667
1668        while self._curr:
1669            if self._match_text_seq("INCREMENT"):
1670                self._match_text_seq("BY")
1671                self._match_text_seq("=")
1672                seq.set("increment", self._parse_term())
1673            elif self._match_text_seq("MINVALUE"):
1674                seq.set("minvalue", self._parse_term())
1675            elif self._match_text_seq("MAXVALUE"):
1676                seq.set("maxvalue", self._parse_term())
1677            elif self._match(TokenType.START_WITH) or self._match_text_seq("START"):
1678                self._match_text_seq("=")
1679                seq.set("start", self._parse_term())
1680            elif self._match_text_seq("CACHE"):
1681                # T-SQL allows empty CACHE which is initialized dynamically
1682                seq.set("cache", self._parse_number() or True)
1683            elif self._match_text_seq("OWNED", "BY"):
1684                # "OWNED BY NONE" is the default
1685                seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column())
1686            else:
1687                opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False)
1688                if opt:
1689                    options.append(opt)
1690                else:
1691                    break
1692
1693        seq.set("options", options if options else None)
1694        return None if self._index == index else seq
1695
1696    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1697        # only used for teradata currently
1698        self._match(TokenType.COMMA)
1699
1700        kwargs = {
1701            "no": self._match_text_seq("NO"),
1702            "dual": self._match_text_seq("DUAL"),
1703            "before": self._match_text_seq("BEFORE"),
1704            "default": self._match_text_seq("DEFAULT"),
1705            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1706            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1707            "after": self._match_text_seq("AFTER"),
1708            "minimum": self._match_texts(("MIN", "MINIMUM")),
1709            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1710        }
1711
1712        if self._match_texts(self.PROPERTY_PARSERS):
1713            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1714            try:
1715                return parser(self, **{k: v for k, v in kwargs.items() if v})
1716            except TypeError:
1717                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1718
1719        return None
1720
1721    def _parse_wrapped_properties(self) -> t.List[exp.Expression]:
1722        return self._parse_wrapped_csv(self._parse_property)
1723
1724    def _parse_property(self) -> t.Optional[exp.Expression]:
1725        if self._match_texts(self.PROPERTY_PARSERS):
1726            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1727
1728        if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS):
1729            return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True)
1730
1731        if self._match_text_seq("COMPOUND", "SORTKEY"):
1732            return self._parse_sortkey(compound=True)
1733
1734        if self._match_text_seq("SQL", "SECURITY"):
1735            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1736
1737        index = self._index
1738        key = self._parse_column()
1739
1740        if not self._match(TokenType.EQ):
1741            self._retreat(index)
1742            return self._parse_sequence_properties()
1743
1744        return self.expression(
1745            exp.Property,
1746            this=key.to_dot() if isinstance(key, exp.Column) else key,
1747            value=self._parse_bitwise() or self._parse_var(any_token=True),
1748        )
1749
1750    def _parse_stored(self) -> exp.FileFormatProperty:
1751        self._match(TokenType.ALIAS)
1752
1753        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1754        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1755
1756        return self.expression(
1757            exp.FileFormatProperty,
1758            this=(
1759                self.expression(
1760                    exp.InputOutputFormat, input_format=input_format, output_format=output_format
1761                )
1762                if input_format or output_format
1763                else self._parse_var_or_string() or self._parse_number() or self._parse_id_var()
1764            ),
1765        )
1766
1767    def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E:
1768        self._match(TokenType.EQ)
1769        self._match(TokenType.ALIAS)
1770        field = self._parse_field()
1771        if isinstance(field, exp.Identifier) and not field.quoted:
1772            field = exp.var(field)
1773
1774        return self.expression(exp_class, this=field, **kwargs)
1775
1776    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]:
1777        properties = []
1778        while True:
1779            if before:
1780                prop = self._parse_property_before()
1781            else:
1782                prop = self._parse_property()
1783            if not prop:
1784                break
1785            for p in ensure_list(prop):
1786                properties.append(p)
1787
1788        if properties:
1789            return self.expression(exp.Properties, expressions=properties)
1790
1791        return None
1792
1793    def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
1794        return self.expression(
1795            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1796        )
1797
1798    def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
1799        if self._index >= 2:
1800            pre_volatile_token = self._tokens[self._index - 2]
1801        else:
1802            pre_volatile_token = None
1803
1804        if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
1805            return exp.VolatileProperty()
1806
1807        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1808
1809    def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty:
1810        self._match_pair(TokenType.EQ, TokenType.ON)
1811
1812        prop = self.expression(exp.WithSystemVersioningProperty)
1813        if self._match(TokenType.L_PAREN):
1814            self._match_text_seq("HISTORY_TABLE", "=")
1815            prop.set("this", self._parse_table_parts())
1816
1817            if self._match(TokenType.COMMA):
1818                self._match_text_seq("DATA_CONSISTENCY_CHECK", "=")
1819                prop.set("expression", self._advance_any() and self._prev.text.upper())
1820
1821            self._match_r_paren()
1822
1823        return prop
1824
1825    def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
1826        if self._match(TokenType.L_PAREN, advance=False):
1827            return self._parse_wrapped_properties()
1828
1829        if self._match_text_seq("JOURNAL"):
1830            return self._parse_withjournaltable()
1831
1832        if self._match_texts(self.VIEW_ATTRIBUTES):
1833            return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper())
1834
1835        if self._match_text_seq("DATA"):
1836            return self._parse_withdata(no=False)
1837        elif self._match_text_seq("NO", "DATA"):
1838            return self._parse_withdata(no=True)
1839
1840        if not self._next:
1841            return None
1842
1843        return self._parse_withisolatedloading()
1844
1845    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1846    def _parse_definer(self) -> t.Optional[exp.DefinerProperty]:
1847        self._match(TokenType.EQ)
1848
1849        user = self._parse_id_var()
1850        self._match(TokenType.PARAMETER)
1851        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1852
1853        if not user or not host:
1854            return None
1855
1856        return exp.DefinerProperty(this=f"{user}@{host}")
1857
1858    def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
1859        self._match(TokenType.TABLE)
1860        self._match(TokenType.EQ)
1861        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1862
1863    def _parse_log(self, no: bool = False) -> exp.LogProperty:
1864        return self.expression(exp.LogProperty, no=no)
1865
1866    def _parse_journal(self, **kwargs) -> exp.JournalProperty:
1867        return self.expression(exp.JournalProperty, **kwargs)
1868
1869    def _parse_checksum(self) -> exp.ChecksumProperty:
1870        self._match(TokenType.EQ)
1871
1872        on = None
1873        if self._match(TokenType.ON):
1874            on = True
1875        elif self._match_text_seq("OFF"):
1876            on = False
1877
1878        return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
1879
1880    def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster:
1881        return self.expression(
1882            exp.Cluster,
1883            expressions=(
1884                self._parse_wrapped_csv(self._parse_ordered)
1885                if wrapped
1886                else self._parse_csv(self._parse_ordered)
1887            ),
1888        )
1889
1890    def _parse_clustered_by(self) -> exp.ClusteredByProperty:
1891        self._match_text_seq("BY")
1892
1893        self._match_l_paren()
1894        expressions = self._parse_csv(self._parse_column)
1895        self._match_r_paren()
1896
1897        if self._match_text_seq("SORTED", "BY"):
1898            self._match_l_paren()
1899            sorted_by = self._parse_csv(self._parse_ordered)
1900            self._match_r_paren()
1901        else:
1902            sorted_by = None
1903
1904        self._match(TokenType.INTO)
1905        buckets = self._parse_number()
1906        self._match_text_seq("BUCKETS")
1907
1908        return self.expression(
1909            exp.ClusteredByProperty,
1910            expressions=expressions,
1911            sorted_by=sorted_by,
1912            buckets=buckets,
1913        )
1914
1915    def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]:
1916        if not self._match_text_seq("GRANTS"):
1917            self._retreat(self._index - 1)
1918            return None
1919
1920        return self.expression(exp.CopyGrantsProperty)
1921
1922    def _parse_freespace(self) -> exp.FreespaceProperty:
1923        self._match(TokenType.EQ)
1924        return self.expression(
1925            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1926        )
1927
1928    def _parse_mergeblockratio(
1929        self, no: bool = False, default: bool = False
1930    ) -> exp.MergeBlockRatioProperty:
1931        if self._match(TokenType.EQ):
1932            return self.expression(
1933                exp.MergeBlockRatioProperty,
1934                this=self._parse_number(),
1935                percent=self._match(TokenType.PERCENT),
1936            )
1937
1938        return self.expression(exp.MergeBlockRatioProperty, no=no, default=default)
1939
1940    def _parse_datablocksize(
1941        self,
1942        default: t.Optional[bool] = None,
1943        minimum: t.Optional[bool] = None,
1944        maximum: t.Optional[bool] = None,
1945    ) -> exp.DataBlocksizeProperty:
1946        self._match(TokenType.EQ)
1947        size = self._parse_number()
1948
1949        units = None
1950        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1951            units = self._prev.text
1952
1953        return self.expression(
1954            exp.DataBlocksizeProperty,
1955            size=size,
1956            units=units,
1957            default=default,
1958            minimum=minimum,
1959            maximum=maximum,
1960        )
1961
1962    def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
1963        self._match(TokenType.EQ)
1964        always = self._match_text_seq("ALWAYS")
1965        manual = self._match_text_seq("MANUAL")
1966        never = self._match_text_seq("NEVER")
1967        default = self._match_text_seq("DEFAULT")
1968
1969        autotemp = None
1970        if self._match_text_seq("AUTOTEMP"):
1971            autotemp = self._parse_schema()
1972
1973        return self.expression(
1974            exp.BlockCompressionProperty,
1975            always=always,
1976            manual=manual,
1977            never=never,
1978            default=default,
1979            autotemp=autotemp,
1980        )
1981
1982    def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]:
1983        index = self._index
1984        no = self._match_text_seq("NO")
1985        concurrent = self._match_text_seq("CONCURRENT")
1986
1987        if not self._match_text_seq("ISOLATED", "LOADING"):
1988            self._retreat(index)
1989            return None
1990
1991        target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False)
1992        return self.expression(
1993            exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target
1994        )
1995
1996    def _parse_locking(self) -> exp.LockingProperty:
1997        if self._match(TokenType.TABLE):
1998            kind = "TABLE"
1999        elif self._match(TokenType.VIEW):
2000            kind = "VIEW"
2001        elif self._match(TokenType.ROW):
2002            kind = "ROW"
2003        elif self._match_text_seq("DATABASE"):
2004            kind = "DATABASE"
2005        else:
2006            kind = None
2007
2008        if kind in ("DATABASE", "TABLE", "VIEW"):
2009            this = self._parse_table_parts()
2010        else:
2011            this = None
2012
2013        if self._match(TokenType.FOR):
2014            for_or_in = "FOR"
2015        elif self._match(TokenType.IN):
2016            for_or_in = "IN"
2017        else:
2018            for_or_in = None
2019
2020        if self._match_text_seq("ACCESS"):
2021            lock_type = "ACCESS"
2022        elif self._match_texts(("EXCL", "EXCLUSIVE")):
2023            lock_type = "EXCLUSIVE"
2024        elif self._match_text_seq("SHARE"):
2025            lock_type = "SHARE"
2026        elif self._match_text_seq("READ"):
2027            lock_type = "READ"
2028        elif self._match_text_seq("WRITE"):
2029            lock_type = "WRITE"
2030        elif self._match_text_seq("CHECKSUM"):
2031            lock_type = "CHECKSUM"
2032        else:
2033            lock_type = None
2034
2035        override = self._match_text_seq("OVERRIDE")
2036
2037        return self.expression(
2038            exp.LockingProperty,
2039            this=this,
2040            kind=kind,
2041            for_or_in=for_or_in,
2042            lock_type=lock_type,
2043            override=override,
2044        )
2045
2046    def _parse_partition_by(self) -> t.List[exp.Expression]:
2047        if self._match(TokenType.PARTITION_BY):
2048            return self._parse_csv(self._parse_conjunction)
2049        return []
2050
2051    def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec:
2052        def _parse_partition_bound_expr() -> t.Optional[exp.Expression]:
2053            if self._match_text_seq("MINVALUE"):
2054                return exp.var("MINVALUE")
2055            if self._match_text_seq("MAXVALUE"):
2056                return exp.var("MAXVALUE")
2057            return self._parse_bitwise()
2058
2059        this: t.Optional[exp.Expression | t.List[exp.Expression]] = None
2060        expression = None
2061        from_expressions = None
2062        to_expressions = None
2063
2064        if self._match(TokenType.IN):
2065            this = self._parse_wrapped_csv(self._parse_bitwise)
2066        elif self._match(TokenType.FROM):
2067            from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
2068            self._match_text_seq("TO")
2069            to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
2070        elif self._match_text_seq("WITH", "(", "MODULUS"):
2071            this = self._parse_number()
2072            self._match_text_seq(",", "REMAINDER")
2073            expression = self._parse_number()
2074            self._match_r_paren()
2075        else:
2076            self.raise_error("Failed to parse partition bound spec.")
2077
2078        return self.expression(
2079            exp.PartitionBoundSpec,
2080            this=this,
2081            expression=expression,
2082            from_expressions=from_expressions,
2083            to_expressions=to_expressions,
2084        )
2085
2086    # https://www.postgresql.org/docs/current/sql-createtable.html
2087    def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]:
2088        if not self._match_text_seq("OF"):
2089            self._retreat(self._index - 1)
2090            return None
2091
2092        this = self._parse_table(schema=True)
2093
2094        if self._match(TokenType.DEFAULT):
2095            expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT")
2096        elif self._match_text_seq("FOR", "VALUES"):
2097            expression = self._parse_partition_bound_spec()
2098        else:
2099            self.raise_error("Expecting either DEFAULT or FOR VALUES clause.")
2100
2101        return self.expression(exp.PartitionedOfProperty, this=this, expression=expression)
2102
2103    def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
2104        self._match(TokenType.EQ)
2105        return self.expression(
2106            exp.PartitionedByProperty,
2107            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
2108        )
2109
2110    def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
2111        if self._match_text_seq("AND", "STATISTICS"):
2112            statistics = True
2113        elif self._match_text_seq("AND", "NO", "STATISTICS"):
2114            statistics = False
2115        else:
2116            statistics = None
2117
2118        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
2119
2120    def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2121        if self._match_text_seq("SQL"):
2122            return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL")
2123        return None
2124
2125    def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2126        if self._match_text_seq("SQL", "DATA"):
2127            return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA")
2128        return None
2129
2130    def _parse_no_property(self) -> t.Optional[exp.Expression]:
2131        if self._match_text_seq("PRIMARY", "INDEX"):
2132            return exp.NoPrimaryIndexProperty()
2133        if self._match_text_seq("SQL"):
2134            return self.expression(exp.SqlReadWriteProperty, this="NO SQL")
2135        return None
2136
2137    def _parse_on_property(self) -> t.Optional[exp.Expression]:
2138        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
2139            return exp.OnCommitProperty()
2140        if self._match_text_seq("COMMIT", "DELETE", "ROWS"):
2141            return exp.OnCommitProperty(delete=True)
2142        return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var()))
2143
2144    def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]:
2145        if self._match_text_seq("SQL", "DATA"):
2146            return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA")
2147        return None
2148
2149    def _parse_distkey(self) -> exp.DistKeyProperty:
2150        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
2151
2152    def _parse_create_like(self) -> t.Optional[exp.LikeProperty]:
2153        table = self._parse_table(schema=True)
2154
2155        options = []
2156        while self._match_texts(("INCLUDING", "EXCLUDING")):
2157            this = self._prev.text.upper()
2158
2159            id_var = self._parse_id_var()
2160            if not id_var:
2161                return None
2162
2163            options.append(
2164                self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper()))
2165            )
2166
2167        return self.expression(exp.LikeProperty, this=table, expressions=options)
2168
2169    def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
2170        return self.expression(
2171            exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound
2172        )
2173
2174    def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
2175        self._match(TokenType.EQ)
2176        return self.expression(
2177            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
2178        )
2179
2180    def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty:
2181        self._match_text_seq("WITH", "CONNECTION")
2182        return self.expression(
2183            exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts()
2184        )
2185
2186    def _parse_returns(self) -> exp.ReturnsProperty:
2187        value: t.Optional[exp.Expression]
2188        is_table = self._match(TokenType.TABLE)
2189
2190        if is_table:
2191            if self._match(TokenType.LT):
2192                value = self.expression(
2193                    exp.Schema,
2194                    this="TABLE",
2195                    expressions=self._parse_csv(self._parse_struct_types),
2196                )
2197                if not self._match(TokenType.GT):
2198                    self.raise_error("Expecting >")
2199            else:
2200                value = self._parse_schema(exp.var("TABLE"))
2201        else:
2202            value = self._parse_types()
2203
2204        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
2205
2206    def _parse_describe(self) -> exp.Describe:
2207        kind = self._match_set(self.CREATABLES) and self._prev.text
2208        style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper()
2209        if not self._match_set(self.ID_VAR_TOKENS, advance=False):
2210            style = None
2211            self._retreat(self._index - 1)
2212        this = self._parse_table(schema=True)
2213        properties = self._parse_properties()
2214        expressions = properties.expressions if properties else None
2215        return self.expression(
2216            exp.Describe, this=this, style=style, kind=kind, expressions=expressions
2217        )
2218
2219    def _parse_insert(self) -> exp.Insert:
2220        comments = ensure_list(self._prev_comments)
2221        hint = self._parse_hint()
2222        overwrite = self._match(TokenType.OVERWRITE)
2223        ignore = self._match(TokenType.IGNORE)
2224        local = self._match_text_seq("LOCAL")
2225        alternative = None
2226        is_function = None
2227
2228        if self._match_text_seq("DIRECTORY"):
2229            this: t.Optional[exp.Expression] = self.expression(
2230                exp.Directory,
2231                this=self._parse_var_or_string(),
2232                local=local,
2233                row_format=self._parse_row_format(match_row=True),
2234            )
2235        else:
2236            if self._match(TokenType.OR):
2237                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
2238
2239            self._match(TokenType.INTO)
2240            comments += ensure_list(self._prev_comments)
2241            self._match(TokenType.TABLE)
2242            is_function = self._match(TokenType.FUNCTION)
2243
2244            this = (
2245                self._parse_table(schema=True, parse_partition=True)
2246                if not is_function
2247                else self._parse_function()
2248            )
2249
2250        returning = self._parse_returning()
2251
2252        return self.expression(
2253            exp.Insert,
2254            comments=comments,
2255            hint=hint,
2256            is_function=is_function,
2257            this=this,
2258            stored=self._match_text_seq("STORED") and self._parse_stored(),
2259            by_name=self._match_text_seq("BY", "NAME"),
2260            exists=self._parse_exists(),
2261            where=self._match_pair(TokenType.REPLACE, TokenType.WHERE)
2262            and self._parse_conjunction(),
2263            expression=self._parse_derived_table_values() or self._parse_ddl_select(),
2264            conflict=self._parse_on_conflict(),
2265            returning=returning or self._parse_returning(),
2266            overwrite=overwrite,
2267            alternative=alternative,
2268            ignore=ignore,
2269        )
2270
2271    def _parse_kill(self) -> exp.Kill:
2272        kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None
2273
2274        return self.expression(
2275            exp.Kill,
2276            this=self._parse_primary(),
2277            kind=kind,
2278        )
2279
2280    def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]:
2281        conflict = self._match_text_seq("ON", "CONFLICT")
2282        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
2283
2284        if not conflict and not duplicate:
2285            return None
2286
2287        conflict_keys = None
2288        constraint = None
2289
2290        if conflict:
2291            if self._match_text_seq("ON", "CONSTRAINT"):
2292                constraint = self._parse_id_var()
2293            elif self._match(TokenType.L_PAREN):
2294                conflict_keys = self._parse_csv(self._parse_id_var)
2295                self._match_r_paren()
2296
2297        action = self._parse_var_from_options(self.CONFLICT_ACTIONS)
2298        if self._prev.token_type == TokenType.UPDATE:
2299            self._match(TokenType.SET)
2300            expressions = self._parse_csv(self._parse_equality)
2301        else:
2302            expressions = None
2303
2304        return self.expression(
2305            exp.OnConflict,
2306            duplicate=duplicate,
2307            expressions=expressions,
2308            action=action,
2309            conflict_keys=conflict_keys,
2310            constraint=constraint,
2311        )
2312
2313    def _parse_returning(self) -> t.Optional[exp.Returning]:
2314        if not self._match(TokenType.RETURNING):
2315            return None
2316        return self.expression(
2317            exp.Returning,
2318            expressions=self._parse_csv(self._parse_expression),
2319            into=self._match(TokenType.INTO) and self._parse_table_part(),
2320        )
2321
2322    def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2323        if not self._match(TokenType.FORMAT):
2324            return None
2325        return self._parse_row_format()
2326
2327    def _parse_row_format(
2328        self, match_row: bool = False
2329    ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]:
2330        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
2331            return None
2332
2333        if self._match_text_seq("SERDE"):
2334            this = self._parse_string()
2335
2336            serde_properties = None
2337            if self._match(TokenType.SERDE_PROPERTIES):
2338                serde_properties = self.expression(
2339                    exp.SerdeProperties, expressions=self._parse_wrapped_properties()
2340                )
2341
2342            return self.expression(
2343                exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties
2344            )
2345
2346        self._match_text_seq("DELIMITED")
2347
2348        kwargs = {}
2349
2350        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
2351            kwargs["fields"] = self._parse_string()
2352            if self._match_text_seq("ESCAPED", "BY"):
2353                kwargs["escaped"] = self._parse_string()
2354        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
2355            kwargs["collection_items"] = self._parse_string()
2356        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
2357            kwargs["map_keys"] = self._parse_string()
2358        if self._match_text_seq("LINES", "TERMINATED", "BY"):
2359            kwargs["lines"] = self._parse_string()
2360        if self._match_text_seq("NULL", "DEFINED", "AS"):
2361            kwargs["null"] = self._parse_string()
2362
2363        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
2364
2365    def _parse_load(self) -> exp.LoadData | exp.Command:
2366        if self._match_text_seq("DATA"):
2367            local = self._match_text_seq("LOCAL")
2368            self._match_text_seq("INPATH")
2369            inpath = self._parse_string()
2370            overwrite = self._match(TokenType.OVERWRITE)
2371            self._match_pair(TokenType.INTO, TokenType.TABLE)
2372
2373            return self.expression(
2374                exp.LoadData,
2375                this=self._parse_table(schema=True),
2376                local=local,
2377                overwrite=overwrite,
2378                inpath=inpath,
2379                partition=self._parse_partition(),
2380                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
2381                serde=self._match_text_seq("SERDE") and self._parse_string(),
2382            )
2383        return self._parse_as_command(self._prev)
2384
2385    def _parse_delete(self) -> exp.Delete:
2386        # This handles MySQL's "Multiple-Table Syntax"
2387        # https://dev.mysql.com/doc/refman/8.0/en/delete.html
2388        tables = None
2389        comments = self._prev_comments
2390        if not self._match(TokenType.FROM, advance=False):
2391            tables = self._parse_csv(self._parse_table) or None
2392
2393        returning = self._parse_returning()
2394
2395        return self.expression(
2396            exp.Delete,
2397            comments=comments,
2398            tables=tables,
2399            this=self._match(TokenType.FROM) and self._parse_table(joins=True),
2400            using=self._match(TokenType.USING) and self._parse_table(joins=True),
2401            where=self._parse_where(),
2402            returning=returning or self._parse_returning(),
2403            limit=self._parse_limit(),
2404        )
2405
2406    def _parse_update(self) -> exp.Update:
2407        comments = self._prev_comments
2408        this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS)
2409        expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
2410        returning = self._parse_returning()
2411        return self.expression(
2412            exp.Update,
2413            comments=comments,
2414            **{  # type: ignore
2415                "this": this,
2416                "expressions": expressions,
2417                "from": self._parse_from(joins=True),
2418                "where": self._parse_where(),
2419                "returning": returning or self._parse_returning(),
2420                "order": self._parse_order(),
2421                "limit": self._parse_limit(),
2422            },
2423        )
2424
2425    def _parse_uncache(self) -> exp.Uncache:
2426        if not self._match(TokenType.TABLE):
2427            self.raise_error("Expecting TABLE after UNCACHE")
2428
2429        return self.expression(
2430            exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True)
2431        )
2432
2433    def _parse_cache(self) -> exp.Cache:
2434        lazy = self._match_text_seq("LAZY")
2435        self._match(TokenType.TABLE)
2436        table = self._parse_table(schema=True)
2437
2438        options = []
2439        if self._match_text_seq("OPTIONS"):
2440            self._match_l_paren()
2441            k = self._parse_string()
2442            self._match(TokenType.EQ)
2443            v = self._parse_string()
2444            options = [k, v]
2445            self._match_r_paren()
2446
2447        self._match(TokenType.ALIAS)
2448        return self.expression(
2449            exp.Cache,
2450            this=table,
2451            lazy=lazy,
2452            options=options,
2453            expression=self._parse_select(nested=True),
2454        )
2455
2456    def _parse_partition(self) -> t.Optional[exp.Partition]:
2457        if not self._match(TokenType.PARTITION):
2458            return None
2459
2460        return self.expression(
2461            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
2462        )
2463
2464    def _parse_value(self) -> exp.Tuple:
2465        if self._match(TokenType.L_PAREN):
2466            expressions = self._parse_csv(self._parse_expression)
2467            self._match_r_paren()
2468            return self.expression(exp.Tuple, expressions=expressions)
2469
2470        # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows.
2471        return self.expression(exp.Tuple, expressions=[self._parse_expression()])
2472
2473    def _parse_projections(self) -> t.List[exp.Expression]:
2474        return self._parse_expressions()
2475
2476    def _parse_select(
2477        self,
2478        nested: bool = False,
2479        table: bool = False,
2480        parse_subquery_alias: bool = True,
2481        parse_set_operation: bool = True,
2482    ) -> t.Optional[exp.Expression]:
2483        cte = self._parse_with()
2484
2485        if cte:
2486            this = self._parse_statement()
2487
2488            if not this:
2489                self.raise_error("Failed to parse any statement following CTE")
2490                return cte
2491
2492            if "with" in this.arg_types:
2493                this.set("with", cte)
2494            else:
2495                self.raise_error(f"{this.key} does not support CTE")
2496                this = cte
2497
2498            return this
2499
2500        # duckdb supports leading with FROM x
2501        from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None
2502
2503        if self._match(TokenType.SELECT):
2504            comments = self._prev_comments
2505
2506            hint = self._parse_hint()
2507            all_ = self._match(TokenType.ALL)
2508            distinct = self._match_set(self.DISTINCT_TOKENS)
2509
2510            kind = (
2511                self._match(TokenType.ALIAS)
2512                and self._match_texts(("STRUCT", "VALUE"))
2513                and self._prev.text.upper()
2514            )
2515
2516            if distinct:
2517                distinct = self.expression(
2518                    exp.Distinct,
2519                    on=self._parse_value() if self._match(TokenType.ON) else None,
2520                )
2521
2522            if all_ and distinct:
2523                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
2524
2525            limit = self._parse_limit(top=True)
2526            projections = self._parse_projections()
2527
2528            this = self.expression(
2529                exp.Select,
2530                kind=kind,
2531                hint=hint,
2532                distinct=distinct,
2533                expressions=projections,
2534                limit=limit,
2535            )
2536            this.comments = comments
2537
2538            into = self._parse_into()
2539            if into:
2540                this.set("into", into)
2541
2542            if not from_:
2543                from_ = self._parse_from()
2544
2545            if from_:
2546                this.set("from", from_)
2547
2548            this = self._parse_query_modifiers(this)
2549        elif (table or nested) and self._match(TokenType.L_PAREN):
2550            if self._match(TokenType.PIVOT):
2551                this = self._parse_simplified_pivot()
2552            elif self._match(TokenType.FROM):
2553                this = exp.select("*").from_(
2554                    t.cast(exp.From, self._parse_from(skip_from_token=True))
2555                )
2556            else:
2557                this = (
2558                    self._parse_table()
2559                    if table
2560                    else self._parse_select(nested=True, parse_set_operation=False)
2561                )
2562                this = self._parse_query_modifiers(self._parse_set_operations(this))
2563
2564            self._match_r_paren()
2565
2566            # We return early here so that the UNION isn't attached to the subquery by the
2567            # following call to _parse_set_operations, but instead becomes the parent node
2568            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
2569        elif self._match(TokenType.VALUES, advance=False):
2570            this = self._parse_derived_table_values()
2571        elif from_:
2572            this = exp.select("*").from_(from_.this, copy=False)
2573        else:
2574            this = None
2575
2576        if parse_set_operation:
2577            return self._parse_set_operations(this)
2578        return this
2579
2580    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
2581        if not skip_with_token and not self._match(TokenType.WITH):
2582            return None
2583
2584        comments = self._prev_comments
2585        recursive = self._match(TokenType.RECURSIVE)
2586
2587        expressions = []
2588        while True:
2589            expressions.append(self._parse_cte())
2590
2591            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
2592                break
2593            else:
2594                self._match(TokenType.WITH)
2595
2596        return self.expression(
2597            exp.With, comments=comments, expressions=expressions, recursive=recursive
2598        )
2599
2600    def _parse_cte(self) -> exp.CTE:
2601        alias = self._parse_table_alias(self.ID_VAR_TOKENS)
2602        if not alias or not alias.this:
2603            self.raise_error("Expected CTE to have alias")
2604
2605        self._match(TokenType.ALIAS)
2606
2607        if self._match_text_seq("NOT", "MATERIALIZED"):
2608            materialized = False
2609        elif self._match_text_seq("MATERIALIZED"):
2610            materialized = True
2611        else:
2612            materialized = None
2613
2614        return self.expression(
2615            exp.CTE,
2616            this=self._parse_wrapped(self._parse_statement),
2617            alias=alias,
2618            materialized=materialized,
2619        )
2620
2621    def _parse_table_alias(
2622        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2623    ) -> t.Optional[exp.TableAlias]:
2624        any_token = self._match(TokenType.ALIAS)
2625        alias = (
2626            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2627            or self._parse_string_as_identifier()
2628        )
2629
2630        index = self._index
2631        if self._match(TokenType.L_PAREN):
2632            columns = self._parse_csv(self._parse_function_parameter)
2633            self._match_r_paren() if columns else self._retreat(index)
2634        else:
2635            columns = None
2636
2637        if not alias and not columns:
2638            return None
2639
2640        return self.expression(exp.TableAlias, this=alias, columns=columns)
2641
2642    def _parse_subquery(
2643        self, this: t.Optional[exp.Expression], parse_alias: bool = True
2644    ) -> t.Optional[exp.Subquery]:
2645        if not this:
2646            return None
2647
2648        return self.expression(
2649            exp.Subquery,
2650            this=this,
2651            pivots=self._parse_pivots(),
2652            alias=self._parse_table_alias() if parse_alias else None,
2653        )
2654
2655    def _implicit_unnests_to_explicit(self, this: E) -> E:
2656        from sqlglot.optimizer.normalize_identifiers import (
2657            normalize_identifiers as _norm,
2658        )
2659
2660        refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name}
2661        for i, join in enumerate(this.args.get("joins") or []):
2662            table = join.this
2663            normalized_table = table.copy()
2664            normalized_table.meta["maybe_column"] = True
2665            normalized_table = _norm(normalized_table, dialect=self.dialect)
2666
2667            if isinstance(table, exp.Table) and not join.args.get("on"):
2668                if normalized_table.parts[0].name in refs:
2669                    table_as_column = table.to_column()
2670                    unnest = exp.Unnest(expressions=[table_as_column])
2671
2672                    # Table.to_column creates a parent Alias node that we want to convert to
2673                    # a TableAlias and attach to the Unnest, so it matches the parser's output
2674                    if isinstance(table.args.get("alias"), exp.TableAlias):
2675                        table_as_column.replace(table_as_column.this)
2676                        exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False)
2677
2678                    table.replace(unnest)
2679
2680            refs.add(normalized_table.alias_or_name)
2681
2682        return this
2683
2684    def _parse_query_modifiers(
2685        self, this: t.Optional[exp.Expression]
2686    ) -> t.Optional[exp.Expression]:
2687        if isinstance(this, (exp.Query, exp.Table)):
2688            for join in self._parse_joins():
2689                this.append("joins", join)
2690            for lateral in iter(self._parse_lateral, None):
2691                this.append("laterals", lateral)
2692
2693            while True:
2694                if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False):
2695                    parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type]
2696                    key, expression = parser(self)
2697
2698                    if expression:
2699                        this.set(key, expression)
2700                        if key == "limit":
2701                            offset = expression.args.pop("offset", None)
2702
2703                            if offset:
2704                                offset = exp.Offset(expression=offset)
2705                                this.set("offset", offset)
2706
2707                                limit_by_expressions = expression.expressions
2708                                expression.set("expressions", None)
2709                                offset.set("expressions", limit_by_expressions)
2710                        continue
2711                break
2712
2713        if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args:
2714            this = self._implicit_unnests_to_explicit(this)
2715
2716        return this
2717
2718    def _parse_hint(self) -> t.Optional[exp.Hint]:
2719        if self._match(TokenType.HINT):
2720            hints = []
2721            for hint in iter(
2722                lambda: self._parse_csv(
2723                    lambda: self._parse_function() or self._parse_var(upper=True)
2724                ),
2725                [],
2726            ):
2727                hints.extend(hint)
2728
2729            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2730                self.raise_error("Expected */ after HINT")
2731
2732            return self.expression(exp.Hint, expressions=hints)
2733
2734        return None
2735
2736    def _parse_into(self) -> t.Optional[exp.Into]:
2737        if not self._match(TokenType.INTO):
2738            return None
2739
2740        temp = self._match(TokenType.TEMPORARY)
2741        unlogged = self._match_text_seq("UNLOGGED")
2742        self._match(TokenType.TABLE)
2743
2744        return self.expression(
2745            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2746        )
2747
2748    def _parse_from(
2749        self, joins: bool = False, skip_from_token: bool = False
2750    ) -> t.Optional[exp.From]:
2751        if not skip_from_token and not self._match(TokenType.FROM):
2752            return None
2753
2754        return self.expression(
2755            exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
2756        )
2757
2758    def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure:
2759        return self.expression(
2760            exp.MatchRecognizeMeasure,
2761            window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(),
2762            this=self._parse_expression(),
2763        )
2764
2765    def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]:
2766        if not self._match(TokenType.MATCH_RECOGNIZE):
2767            return None
2768
2769        self._match_l_paren()
2770
2771        partition = self._parse_partition_by()
2772        order = self._parse_order()
2773
2774        measures = (
2775            self._parse_csv(self._parse_match_recognize_measure)
2776            if self._match_text_seq("MEASURES")
2777            else None
2778        )
2779
2780        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2781            rows = exp.var("ONE ROW PER MATCH")
2782        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2783            text = "ALL ROWS PER MATCH"
2784            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2785                text += " SHOW EMPTY MATCHES"
2786            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2787                text += " OMIT EMPTY MATCHES"
2788            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2789                text += " WITH UNMATCHED ROWS"
2790            rows = exp.var(text)
2791        else:
2792            rows = None
2793
2794        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2795            text = "AFTER MATCH SKIP"
2796            if self._match_text_seq("PAST", "LAST", "ROW"):
2797                text += " PAST LAST ROW"
2798            elif self._match_text_seq("TO", "NEXT", "ROW"):
2799                text += " TO NEXT ROW"
2800            elif self._match_text_seq("TO", "FIRST"):
2801                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2802            elif self._match_text_seq("TO", "LAST"):
2803                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2804            after = exp.var(text)
2805        else:
2806            after = None
2807
2808        if self._match_text_seq("PATTERN"):
2809            self._match_l_paren()
2810
2811            if not self._curr:
2812                self.raise_error("Expecting )", self._curr)
2813
2814            paren = 1
2815            start = self._curr
2816
2817            while self._curr and paren > 0:
2818                if self._curr.token_type == TokenType.L_PAREN:
2819                    paren += 1
2820                if self._curr.token_type == TokenType.R_PAREN:
2821                    paren -= 1
2822
2823                end = self._prev
2824                self._advance()
2825
2826            if paren > 0:
2827                self.raise_error("Expecting )", self._curr)
2828
2829            pattern = exp.var(self._find_sql(start, end))
2830        else:
2831            pattern = None
2832
2833        define = (
2834            self._parse_csv(self._parse_name_as_expression)
2835            if self._match_text_seq("DEFINE")
2836            else None
2837        )
2838
2839        self._match_r_paren()
2840
2841        return self.expression(
2842            exp.MatchRecognize,
2843            partition_by=partition,
2844            order=order,
2845            measures=measures,
2846            rows=rows,
2847            after=after,
2848            pattern=pattern,
2849            define=define,
2850            alias=self._parse_table_alias(),
2851        )
2852
2853    def _parse_lateral(self) -> t.Optional[exp.Lateral]:
2854        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2855        if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY):
2856            cross_apply = False
2857
2858        if cross_apply is not None:
2859            this = self._parse_select(table=True)
2860            view = None
2861            outer = None
2862        elif self._match(TokenType.LATERAL):
2863            this = self._parse_select(table=True)
2864            view = self._match(TokenType.VIEW)
2865            outer = self._match(TokenType.OUTER)
2866        else:
2867            return None
2868
2869        if not this:
2870            this = (
2871                self._parse_unnest()
2872                or self._parse_function()
2873                or self._parse_id_var(any_token=False)
2874            )
2875
2876            while self._match(TokenType.DOT):
2877                this = exp.Dot(
2878                    this=this,
2879                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2880                )
2881
2882        if view:
2883            table = self._parse_id_var(any_token=False)
2884            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2885            table_alias: t.Optional[exp.TableAlias] = self.expression(
2886                exp.TableAlias, this=table, columns=columns
2887            )
2888        elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias:
2889            # We move the alias from the lateral's child node to the lateral itself
2890            table_alias = this.args["alias"].pop()
2891        else:
2892            table_alias = self._parse_table_alias()
2893
2894        return self.expression(
2895            exp.Lateral,
2896            this=this,
2897            view=view,
2898            outer=outer,
2899            alias=table_alias,
2900            cross_apply=cross_apply,
2901        )
2902
2903    def _parse_join_parts(
2904        self,
2905    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2906        return (
2907            self._match_set(self.JOIN_METHODS) and self._prev,
2908            self._match_set(self.JOIN_SIDES) and self._prev,
2909            self._match_set(self.JOIN_KINDS) and self._prev,
2910        )
2911
2912    def _parse_join(
2913        self, skip_join_token: bool = False, parse_bracket: bool = False
2914    ) -> t.Optional[exp.Join]:
2915        if self._match(TokenType.COMMA):
2916            return self.expression(exp.Join, this=self._parse_table())
2917
2918        index = self._index
2919        method, side, kind = self._parse_join_parts()
2920        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2921        join = self._match(TokenType.JOIN)
2922
2923        if not skip_join_token and not join:
2924            self._retreat(index)
2925            kind = None
2926            method = None
2927            side = None
2928
2929        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2930        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2931
2932        if not skip_join_token and not join and not outer_apply and not cross_apply:
2933            return None
2934
2935        kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)}
2936
2937        if method:
2938            kwargs["method"] = method.text
2939        if side:
2940            kwargs["side"] = side.text
2941        if kind:
2942            kwargs["kind"] = kind.text
2943        if hint:
2944            kwargs["hint"] = hint
2945
2946        if self._match(TokenType.MATCH_CONDITION):
2947            kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison)
2948
2949        if self._match(TokenType.ON):
2950            kwargs["on"] = self._parse_conjunction()
2951        elif self._match(TokenType.USING):
2952            kwargs["using"] = self._parse_wrapped_id_vars()
2953        elif not isinstance(kwargs["this"], exp.Unnest) and not (
2954            kind and kind.token_type == TokenType.CROSS
2955        ):
2956            index = self._index
2957            joins: t.Optional[list] = list(self._parse_joins())
2958
2959            if joins and self._match(TokenType.ON):
2960                kwargs["on"] = self._parse_conjunction()
2961            elif joins and self._match(TokenType.USING):
2962                kwargs["using"] = self._parse_wrapped_id_vars()
2963            else:
2964                joins = None
2965                self._retreat(index)
2966
2967            kwargs["this"].set("joins", joins if joins else None)
2968
2969        comments = [c for token in (method, side, kind) if token for c in token.comments]
2970        return self.expression(exp.Join, comments=comments, **kwargs)
2971
2972    def _parse_opclass(self) -> t.Optional[exp.Expression]:
2973        this = self._parse_conjunction()
2974
2975        if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
2976            return this
2977
2978        if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False):
2979            return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts())
2980
2981        return this
2982
2983    def _parse_index_params(self) -> exp.IndexParameters:
2984        using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
2985
2986        if self._match(TokenType.L_PAREN, advance=False):
2987            columns = self._parse_wrapped_csv(self._parse_with_operator)
2988        else:
2989            columns = None
2990
2991        include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
2992        partition_by = self._parse_partition_by()
2993        with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties()
2994        tablespace = (
2995            self._parse_var(any_token=True)
2996            if self._match_text_seq("USING", "INDEX", "TABLESPACE")
2997            else None
2998        )
2999        where = self._parse_where()
3000
3001        return self.expression(
3002            exp.IndexParameters,
3003            using=using,
3004            columns=columns,
3005            include=include,
3006            partition_by=partition_by,
3007            where=where,
3008            with_storage=with_storage,
3009            tablespace=tablespace,
3010        )
3011
3012    def _parse_index(
3013        self,
3014        index: t.Optional[exp.Expression] = None,
3015    ) -> t.Optional[exp.Index]:
3016        if index:
3017            unique = None
3018            primary = None
3019            amp = None
3020
3021            self._match(TokenType.ON)
3022            self._match(TokenType.TABLE)  # hive
3023            table = self._parse_table_parts(schema=True)
3024        else:
3025            unique = self._match(TokenType.UNIQUE)
3026            primary = self._match_text_seq("PRIMARY")
3027            amp = self._match_text_seq("AMP")
3028
3029            if not self._match(TokenType.INDEX):
3030                return None
3031
3032            index = self._parse_id_var()
3033            table = None
3034
3035        params = self._parse_index_params()
3036
3037        return self.expression(
3038            exp.Index,
3039            this=index,
3040            table=table,
3041            unique=unique,
3042            primary=primary,
3043            amp=amp,
3044            params=params,
3045        )
3046
3047    def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
3048        hints: t.List[exp.Expression] = []
3049        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
3050            # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16
3051            hints.append(
3052                self.expression(
3053                    exp.WithTableHint,
3054                    expressions=self._parse_csv(
3055                        lambda: self._parse_function() or self._parse_var(any_token=True)
3056                    ),
3057                )
3058            )
3059            self._match_r_paren()
3060        else:
3061            # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html
3062            while self._match_set(self.TABLE_INDEX_HINT_TOKENS):
3063                hint = exp.IndexTableHint(this=self._prev.text.upper())
3064
3065                self._match_texts(("INDEX", "KEY"))
3066                if self._match(TokenType.FOR):
3067                    hint.set("target", self._advance_any() and self._prev.text.upper())
3068
3069                hint.set("expressions", self._parse_wrapped_id_vars())
3070                hints.append(hint)
3071
3072        return hints or None
3073
3074    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
3075        return (
3076            (not schema and self._parse_function(optional_parens=False))
3077            or self._parse_id_var(any_token=False)
3078            or self._parse_string_as_identifier()
3079            or self._parse_placeholder()
3080        )
3081
3082    def _parse_table_parts(
3083        self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False
3084    ) -> exp.Table:
3085        catalog = None
3086        db = None
3087        table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema)
3088
3089        while self._match(TokenType.DOT):
3090            if catalog:
3091                # This allows nesting the table in arbitrarily many dot expressions if needed
3092                table = self.expression(
3093                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
3094                )
3095            else:
3096                catalog = db
3097                db = table
3098                # "" used for tsql FROM a..b case
3099                table = self._parse_table_part(schema=schema) or ""
3100
3101        if (
3102            wildcard
3103            and self._is_connected()
3104            and (isinstance(table, exp.Identifier) or not table)
3105            and self._match(TokenType.STAR)
3106        ):
3107            if isinstance(table, exp.Identifier):
3108                table.args["this"] += "*"
3109            else:
3110                table = exp.Identifier(this="*")
3111
3112        if is_db_reference:
3113            catalog = db
3114            db = table
3115            table = None
3116
3117        if not table and not is_db_reference:
3118            self.raise_error(f"Expected table name but got {self._curr}")
3119        if not db and is_db_reference:
3120            self.raise_error(f"Expected database name but got {self._curr}")
3121
3122        return self.expression(
3123            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
3124        )
3125
3126    def _parse_table(
3127        self,
3128        schema: bool = False,
3129        joins: bool = False,
3130        alias_tokens: t.Optional[t.Collection[TokenType]] = None,
3131        parse_bracket: bool = False,
3132        is_db_reference: bool = False,
3133        parse_partition: bool = False,
3134    ) -> t.Optional[exp.Expression]:
3135        lateral = self._parse_lateral()
3136        if lateral:
3137            return lateral
3138
3139        unnest = self._parse_unnest()
3140        if unnest:
3141            return unnest
3142
3143        values = self._parse_derived_table_values()
3144        if values:
3145            return values
3146
3147        subquery = self._parse_select(table=True)
3148        if subquery:
3149            if not subquery.args.get("pivots"):
3150                subquery.set("pivots", self._parse_pivots())
3151            return subquery
3152
3153        bracket = parse_bracket and self._parse_bracket(None)
3154        bracket = self.expression(exp.Table, this=bracket) if bracket else None
3155
3156        only = self._match(TokenType.ONLY)
3157
3158        this = t.cast(
3159            exp.Expression,
3160            bracket
3161            or self._parse_bracket(
3162                self._parse_table_parts(schema=schema, is_db_reference=is_db_reference)
3163            ),
3164        )
3165
3166        if only:
3167            this.set("only", only)
3168
3169        # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context
3170        self._match_text_seq("*")
3171
3172        parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION
3173        if parse_partition and self._match(TokenType.PARTITION, advance=False):
3174            this.set("partition", self._parse_partition())
3175
3176        if schema:
3177            return self._parse_schema(this=this)
3178
3179        version = self._parse_version()
3180
3181        if version:
3182            this.set("version", version)
3183
3184        if self.dialect.ALIAS_POST_TABLESAMPLE:
3185            table_sample = self._parse_table_sample()
3186
3187        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
3188        if alias:
3189            this.set("alias", alias)
3190
3191        if isinstance(this, exp.Table) and self._match_text_seq("AT"):
3192            return self.expression(
3193                exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var()
3194            )
3195
3196        this.set("hints", self._parse_table_hints())
3197
3198        if not this.args.get("pivots"):
3199            this.set("pivots", self._parse_pivots())
3200
3201        if not self.dialect.ALIAS_POST_TABLESAMPLE:
3202            table_sample = self._parse_table_sample()
3203
3204        if table_sample:
3205            table_sample.set("this", this)
3206            this = table_sample
3207
3208        if joins:
3209            for join in self._parse_joins():
3210                this.append("joins", join)
3211
3212        if self._match_pair(TokenType.WITH, TokenType.ORDINALITY):
3213            this.set("ordinality", True)
3214            this.set("alias", self._parse_table_alias())
3215
3216        return this
3217
3218    def _parse_version(self) -> t.Optional[exp.Version]:
3219        if self._match(TokenType.TIMESTAMP_SNAPSHOT):
3220            this = "TIMESTAMP"
3221        elif self._match(TokenType.VERSION_SNAPSHOT):
3222            this = "VERSION"
3223        else:
3224            return None
3225
3226        if self._match_set((TokenType.FROM, TokenType.BETWEEN)):
3227            kind = self._prev.text.upper()
3228            start = self._parse_bitwise()
3229            self._match_texts(("TO", "AND"))
3230            end = self._parse_bitwise()
3231            expression: t.Optional[exp.Expression] = self.expression(
3232                exp.Tuple, expressions=[start, end]
3233            )
3234        elif self._match_text_seq("CONTAINED", "IN"):
3235            kind = "CONTAINED IN"
3236            expression = self.expression(
3237                exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise)
3238            )
3239        elif self._match(TokenType.ALL):
3240            kind = "ALL"
3241            expression = None
3242        else:
3243            self._match_text_seq("AS", "OF")
3244            kind = "AS OF"
3245            expression = self._parse_type()
3246
3247        return self.expression(exp.Version, this=this, expression=expression, kind=kind)
3248
3249    def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]:
3250        if not self._match(TokenType.UNNEST):
3251            return None
3252
3253        expressions = self._parse_wrapped_csv(self._parse_equality)
3254        offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
3255
3256        alias = self._parse_table_alias() if with_alias else None
3257
3258        if alias:
3259            if self.dialect.UNNEST_COLUMN_ONLY:
3260                if alias.args.get("columns"):
3261                    self.raise_error("Unexpected extra column alias in unnest.")
3262
3263                alias.set("columns", [alias.this])
3264                alias.set("this", None)
3265
3266            columns = alias.args.get("columns") or []
3267            if offset and len(expressions) < len(columns):
3268                offset = columns.pop()
3269
3270        if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET):
3271            self._match(TokenType.ALIAS)
3272            offset = self._parse_id_var(
3273                any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS
3274            ) or exp.to_identifier("offset")
3275
3276        return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset)
3277
3278    def _parse_derived_table_values(self) -> t.Optional[exp.Values]:
3279        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
3280        if not is_derived and not self._match_text_seq("VALUES"):
3281            return None
3282
3283        expressions = self._parse_csv(self._parse_value)
3284        alias = self._parse_table_alias()
3285
3286        if is_derived:
3287            self._match_r_paren()
3288
3289        return self.expression(
3290            exp.Values, expressions=expressions, alias=alias or self._parse_table_alias()
3291        )
3292
3293    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
3294        if not self._match(TokenType.TABLE_SAMPLE) and not (
3295            as_modifier and self._match_text_seq("USING", "SAMPLE")
3296        ):
3297            return None
3298
3299        bucket_numerator = None
3300        bucket_denominator = None
3301        bucket_field = None
3302        percent = None
3303        size = None
3304        seed = None
3305
3306        method = self._parse_var(tokens=(TokenType.ROW,), upper=True)
3307        matched_l_paren = self._match(TokenType.L_PAREN)
3308
3309        if self.TABLESAMPLE_CSV:
3310            num = None
3311            expressions = self._parse_csv(self._parse_primary)
3312        else:
3313            expressions = None
3314            num = (
3315                self._parse_factor()
3316                if self._match(TokenType.NUMBER, advance=False)
3317                else self._parse_primary() or self._parse_placeholder()
3318            )
3319
3320        if self._match_text_seq("BUCKET"):
3321            bucket_numerator = self._parse_number()
3322            self._match_text_seq("OUT", "OF")
3323            bucket_denominator = bucket_denominator = self._parse_number()
3324            self._match(TokenType.ON)
3325            bucket_field = self._parse_field()
3326        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
3327            percent = num
3328        elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT:
3329            size = num
3330        else:
3331            percent = num
3332
3333        if matched_l_paren:
3334            self._match_r_paren()
3335
3336        if self._match(TokenType.L_PAREN):
3337            method = self._parse_var(upper=True)
3338            seed = self._match(TokenType.COMMA) and self._parse_number()
3339            self._match_r_paren()
3340        elif self._match_texts(("SEED", "REPEATABLE")):
3341            seed = self._parse_wrapped(self._parse_number)
3342
3343        return self.expression(
3344            exp.TableSample,
3345            expressions=expressions,
3346            method=method,
3347            bucket_numerator=bucket_numerator,
3348            bucket_denominator=bucket_denominator,
3349            bucket_field=bucket_field,
3350            percent=percent,
3351            size=size,
3352            seed=seed,
3353        )
3354
3355    def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]:
3356        return list(iter(self._parse_pivot, None)) or None
3357
3358    def _parse_joins(self) -> t.Iterator[exp.Join]:
3359        return iter(self._parse_join, None)
3360
3361    # https://duckdb.org/docs/sql/statements/pivot
3362    def _parse_simplified_pivot(self) -> exp.Pivot:
3363        def _parse_on() -> t.Optional[exp.Expression]:
3364            this = self._parse_bitwise()
3365            return self._parse_in(this) if self._match(TokenType.IN) else this
3366
3367        this = self._parse_table()
3368        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
3369        using = self._match(TokenType.USING) and self._parse_csv(
3370            lambda: self._parse_alias(self._parse_function())
3371        )
3372        group = self._parse_group()
3373        return self.expression(
3374            exp.Pivot, this=this, expressions=expressions, using=using, group=group
3375        )
3376
3377    def _parse_pivot_in(self) -> exp.In:
3378        def _parse_aliased_expression() -> t.Optional[exp.Expression]:
3379            this = self._parse_conjunction()
3380
3381            self._match(TokenType.ALIAS)
3382            alias = self._parse_field()
3383            if alias:
3384                return self.expression(exp.PivotAlias, this=this, alias=alias)
3385
3386            return this
3387
3388        value = self._parse_column()
3389
3390        if not self._match_pair(TokenType.IN, TokenType.L_PAREN):
3391            self.raise_error("Expecting IN (")
3392
3393        aliased_expressions = self._parse_csv(_parse_aliased_expression)
3394
3395        self._match_r_paren()
3396        return self.expression(exp.In, this=value, expressions=aliased_expressions)
3397
3398    def _parse_pivot(self) -> t.Optional[exp.Pivot]:
3399        index = self._index
3400        include_nulls = None
3401
3402        if self._match(TokenType.PIVOT):
3403            unpivot = False
3404        elif self._match(TokenType.UNPIVOT):
3405            unpivot = True
3406
3407            # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax
3408            if self._match_text_seq("INCLUDE", "NULLS"):
3409                include_nulls = True
3410            elif self._match_text_seq("EXCLUDE", "NULLS"):
3411                include_nulls = False
3412        else:
3413            return None
3414
3415        expressions = []
3416
3417        if not self._match(TokenType.L_PAREN):
3418            self._retreat(index)
3419            return None
3420
3421        if unpivot:
3422            expressions = self._parse_csv(self._parse_column)
3423        else:
3424            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
3425
3426        if not expressions:
3427            self.raise_error("Failed to parse PIVOT's aggregation list")
3428
3429        if not self._match(TokenType.FOR):
3430            self.raise_error("Expecting FOR")
3431
3432        field = self._parse_pivot_in()
3433
3434        self._match_r_paren()
3435
3436        pivot = self.expression(
3437            exp.Pivot,
3438            expressions=expressions,
3439            field=field,
3440            unpivot=unpivot,
3441            include_nulls=include_nulls,
3442        )
3443
3444        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
3445            pivot.set("alias", self._parse_table_alias())
3446
3447        if not unpivot:
3448            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
3449
3450            columns: t.List[exp.Expression] = []
3451            for fld in pivot.args["field"].expressions:
3452                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
3453                for name in names:
3454                    if self.PREFIXED_PIVOT_COLUMNS:
3455                        name = f"{name}_{field_name}" if name else field_name
3456                    else:
3457                        name = f"{field_name}_{name}" if name else field_name
3458
3459                    columns.append(exp.to_identifier(name))
3460
3461            pivot.set("columns", columns)
3462
3463        return pivot
3464
3465    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
3466        return [agg.alias for agg in aggregations]
3467
3468    def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]:
3469        if not skip_where_token and not self._match(TokenType.PREWHERE):
3470            return None
3471
3472        return self.expression(
3473            exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction()
3474        )
3475
3476    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]:
3477        if not skip_where_token and not self._match(TokenType.WHERE):
3478            return None
3479
3480        return self.expression(
3481            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
3482        )
3483
3484    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]:
3485        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
3486            return None
3487
3488        elements: t.Dict[str, t.Any] = defaultdict(list)
3489
3490        if self._match(TokenType.ALL):
3491            elements["all"] = True
3492        elif self._match(TokenType.DISTINCT):
3493            elements["all"] = False
3494
3495        while True:
3496            expressions = self._parse_csv(self._parse_conjunction)
3497            if expressions:
3498                elements["expressions"].extend(expressions)
3499
3500            grouping_sets = self._parse_grouping_sets()
3501            if grouping_sets:
3502                elements["grouping_sets"].extend(grouping_sets)
3503
3504            rollup = None
3505            cube = None
3506            totals = None
3507
3508            index = self._index
3509            with_ = self._match(TokenType.WITH)
3510            if self._match(TokenType.ROLLUP):
3511                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
3512                elements["rollup"].extend(ensure_list(rollup))
3513
3514            if self._match(TokenType.CUBE):
3515                cube = with_ or self._parse_wrapped_csv(self._parse_column)
3516                elements["cube"].extend(ensure_list(cube))
3517
3518            if self._match_text_seq("TOTALS"):
3519                totals = True
3520                elements["totals"] = True  # type: ignore
3521
3522            if not (grouping_sets or rollup or cube or totals):
3523                if with_:
3524                    self._retreat(index)
3525                break
3526
3527        return self.expression(exp.Group, **elements)  # type: ignore
3528
3529    def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]:
3530        if not self._match(TokenType.GROUPING_SETS):
3531            return None
3532
3533        return self._parse_wrapped_csv(self._parse_grouping_set)
3534
3535    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
3536        if self._match(TokenType.L_PAREN):
3537            grouping_set = self._parse_csv(self._parse_column)
3538            self._match_r_paren()
3539            return self.expression(exp.Tuple, expressions=grouping_set)
3540
3541        return self._parse_column()
3542
3543    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]:
3544        if not skip_having_token and not self._match(TokenType.HAVING):
3545            return None
3546        return self.expression(exp.Having, this=self._parse_conjunction())
3547
3548    def _parse_qualify(self) -> t.Optional[exp.Qualify]:
3549        if not self._match(TokenType.QUALIFY):
3550            return None
3551        return self.expression(exp.Qualify, this=self._parse_conjunction())
3552
3553    def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]:
3554        if skip_start_token:
3555            start = None
3556        elif self._match(TokenType.START_WITH):
3557            start = self._parse_conjunction()
3558        else:
3559            return None
3560
3561        self._match(TokenType.CONNECT_BY)
3562        nocycle = self._match_text_seq("NOCYCLE")
3563        self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression(
3564            exp.Prior, this=self._parse_bitwise()
3565        )
3566        connect = self._parse_conjunction()
3567        self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR")
3568
3569        if not start and self._match(TokenType.START_WITH):
3570            start = self._parse_conjunction()
3571
3572        return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle)
3573
3574    def _parse_name_as_expression(self) -> exp.Alias:
3575        return self.expression(
3576            exp.Alias,
3577            alias=self._parse_id_var(any_token=True),
3578            this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
3579        )
3580
3581    def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]:
3582        if self._match_text_seq("INTERPOLATE"):
3583            return self._parse_wrapped_csv(self._parse_name_as_expression)
3584        return None
3585
3586    def _parse_order(
3587        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
3588    ) -> t.Optional[exp.Expression]:
3589        siblings = None
3590        if not skip_order_token and not self._match(TokenType.ORDER_BY):
3591            if not self._match(TokenType.ORDER_SIBLINGS_BY):
3592                return this
3593
3594            siblings = True
3595
3596        return self.expression(
3597            exp.Order,
3598            this=this,
3599            expressions=self._parse_csv(self._parse_ordered),
3600            interpolate=self._parse_interpolate(),
3601            siblings=siblings,
3602        )
3603
3604    def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]:
3605        if not self._match(token):
3606            return None
3607        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
3608
3609    def _parse_ordered(
3610        self, parse_method: t.Optional[t.Callable] = None
3611    ) -> t.Optional[exp.Ordered]:
3612        this = parse_method() if parse_method else self._parse_conjunction()
3613        if not this:
3614            return None
3615
3616        asc = self._match(TokenType.ASC)
3617        desc = self._match(TokenType.DESC) or (asc and False)
3618
3619        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
3620        is_nulls_last = self._match_text_seq("NULLS", "LAST")
3621
3622        nulls_first = is_nulls_first or False
3623        explicitly_null_ordered = is_nulls_first or is_nulls_last
3624
3625        if (
3626            not explicitly_null_ordered
3627            and (
3628                (not desc and self.dialect.NULL_ORDERING == "nulls_are_small")
3629                or (desc and self.dialect.NULL_ORDERING != "nulls_are_small")
3630            )
3631            and self.dialect.NULL_ORDERING != "nulls_are_last"
3632        ):
3633            nulls_first = True
3634
3635        if self._match_text_seq("WITH", "FILL"):
3636            with_fill = self.expression(
3637                exp.WithFill,
3638                **{  # type: ignore
3639                    "from": self._match(TokenType.FROM) and self._parse_bitwise(),
3640                    "to": self._match_text_seq("TO") and self._parse_bitwise(),
3641                    "step": self._match_text_seq("STEP") and self._parse_bitwise(),
3642                },
3643            )
3644        else:
3645            with_fill = None
3646
3647        return self.expression(
3648            exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill
3649        )
3650
3651    def _parse_limit(
3652        self,
3653        this: t.Optional[exp.Expression] = None,
3654        top: bool = False,
3655        skip_limit_token: bool = False,
3656    ) -> t.Optional[exp.Expression]:
3657        if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT):
3658            comments = self._prev_comments
3659            if top:
3660                limit_paren = self._match(TokenType.L_PAREN)
3661                expression = self._parse_term() if limit_paren else self._parse_number()
3662
3663                if limit_paren:
3664                    self._match_r_paren()
3665            else:
3666                expression = self._parse_term()
3667
3668            if self._match(TokenType.COMMA):
3669                offset = expression
3670                expression = self._parse_term()
3671            else:
3672                offset = None
3673
3674            limit_exp = self.expression(
3675                exp.Limit,
3676                this=this,
3677                expression=expression,
3678                offset=offset,
3679                comments=comments,
3680                expressions=self._parse_limit_by(),
3681            )
3682
3683            return limit_exp
3684
3685        if self._match(TokenType.FETCH):
3686            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
3687            direction = self._prev.text.upper() if direction else "FIRST"
3688
3689            count = self._parse_field(tokens=self.FETCH_TOKENS)
3690            percent = self._match(TokenType.PERCENT)
3691
3692            self._match_set((TokenType.ROW, TokenType.ROWS))
3693
3694            only = self._match_text_seq("ONLY")
3695            with_ties = self._match_text_seq("WITH", "TIES")
3696
3697            if only and with_ties:
3698                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
3699
3700            return self.expression(
3701                exp.Fetch,
3702                direction=direction,
3703                count=count,
3704                percent=percent,
3705                with_ties=with_ties,
3706            )
3707
3708        return this
3709
3710    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3711        if not self._match(TokenType.OFFSET):
3712            return this
3713
3714        count = self._parse_term()
3715        self._match_set((TokenType.ROW, TokenType.ROWS))
3716
3717        return self.expression(
3718            exp.Offset, this=this, expression=count, expressions=self._parse_limit_by()
3719        )
3720
3721    def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]:
3722        return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise)
3723
3724    def _parse_locks(self) -> t.List[exp.Lock]:
3725        locks = []
3726        while True:
3727            if self._match_text_seq("FOR", "UPDATE"):
3728                update = True
3729            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
3730                "LOCK", "IN", "SHARE", "MODE"
3731            ):
3732                update = False
3733            else:
3734                break
3735
3736            expressions = None
3737            if self._match_text_seq("OF"):
3738                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
3739
3740            wait: t.Optional[bool | exp.Expression] = None
3741            if self._match_text_seq("NOWAIT"):
3742                wait = True
3743            elif self._match_text_seq("WAIT"):
3744                wait = self._parse_primary()
3745            elif self._match_text_seq("SKIP", "LOCKED"):
3746                wait = False
3747
3748            locks.append(
3749                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
3750            )
3751
3752        return locks
3753
3754    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3755        while this and self._match_set(self.SET_OPERATIONS):
3756            token_type = self._prev.token_type
3757
3758            if token_type == TokenType.UNION:
3759                operation = exp.Union
3760            elif token_type == TokenType.EXCEPT:
3761                operation = exp.Except
3762            else:
3763                operation = exp.Intersect
3764
3765            comments = self._prev.comments
3766            distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL)
3767            by_name = self._match_text_seq("BY", "NAME")
3768            expression = self._parse_select(nested=True, parse_set_operation=False)
3769
3770            this = self.expression(
3771                operation,
3772                comments=comments,
3773                this=this,
3774                distinct=distinct,
3775                by_name=by_name,
3776                expression=expression,
3777            )
3778
3779        if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION:
3780            expression = this.expression
3781
3782            if expression:
3783                for arg in self.UNION_MODIFIERS:
3784                    expr = expression.args.get(arg)
3785                    if expr:
3786                        this.set(arg, expr.pop())
3787
3788        return this
3789
3790    def _parse_expression(self) -> t.Optional[exp.Expression]:
3791        return self._parse_alias(self._parse_conjunction())
3792
3793    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
3794        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
3795
3796    def _parse_equality(self) -> t.Optional[exp.Expression]:
3797        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
3798
3799    def _parse_comparison(self) -> t.Optional[exp.Expression]:
3800        return self._parse_tokens(self._parse_range, self.COMPARISON)
3801
3802    def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3803        this = this or self._parse_bitwise()
3804        negate = self._match(TokenType.NOT)
3805
3806        if self._match_set(self.RANGE_PARSERS):
3807            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
3808            if not expression:
3809                return this
3810
3811            this = expression
3812        elif self._match(TokenType.ISNULL):
3813            this = self.expression(exp.Is, this=this, expression=exp.Null())
3814
3815        # Postgres supports ISNULL and NOTNULL for conditions.
3816        # https://blog.andreiavram.ro/postgresql-null-composite-type/
3817        if self._match(TokenType.NOTNULL):
3818            this = self.expression(exp.Is, this=this, expression=exp.Null())
3819            this = self.expression(exp.Not, this=this)
3820
3821        if negate:
3822            this = self.expression(exp.Not, this=this)
3823
3824        if self._match(TokenType.IS):
3825            this = self._parse_is(this)
3826
3827        return this
3828
3829    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3830        index = self._index - 1
3831        negate = self._match(TokenType.NOT)
3832
3833        if self._match_text_seq("DISTINCT", "FROM"):
3834            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
3835            return self.expression(klass, this=this, expression=self._parse_bitwise())
3836
3837        expression = self._parse_null() or self._parse_boolean()
3838        if not expression:
3839            self._retreat(index)
3840            return None
3841
3842        this = self.expression(exp.Is, this=this, expression=expression)
3843        return self.expression(exp.Not, this=this) if negate else this
3844
3845    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
3846        unnest = self._parse_unnest(with_alias=False)
3847        if unnest:
3848            this = self.expression(exp.In, this=this, unnest=unnest)
3849        elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)):
3850            matched_l_paren = self._prev.token_type == TokenType.L_PAREN
3851            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
3852
3853            if len(expressions) == 1 and isinstance(expressions[0], exp.Query):
3854                this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False))
3855            else:
3856                this = self.expression(exp.In, this=this, expressions=expressions)
3857
3858            if matched_l_paren:
3859                self._match_r_paren(this)
3860            elif not self._match(TokenType.R_BRACKET, expression=this):
3861                self.raise_error("Expecting ]")
3862        else:
3863            this = self.expression(exp.In, this=this, field=self._parse_field())
3864
3865        return this
3866
3867    def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between:
3868        low = self._parse_bitwise()
3869        self._match(TokenType.AND)
3870        high = self._parse_bitwise()
3871        return self.expression(exp.Between, this=this, low=low, high=high)
3872
3873    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3874        if not self._match(TokenType.ESCAPE):
3875            return this
3876        return self.expression(exp.Escape, this=this, expression=self._parse_string())
3877
3878    def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]:
3879        index = self._index
3880
3881        if not self._match(TokenType.INTERVAL) and match_interval:
3882            return None
3883
3884        if self._match