Edit on GitHub

sqlglot.dialects.dialect

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from enum import Enum, auto
   6from functools import reduce
   7
   8from sqlglot import exp
   9from sqlglot.errors import ParseError
  10from sqlglot.generator import Generator, unsupported_args
  11from sqlglot.helper import AutoName, flatten, is_int, seq_get, subclasses, to_bool
  12from sqlglot.jsonpath import JSONPathTokenizer, parse as parse_json_path
  13from sqlglot.parser import Parser
  14from sqlglot.time import TIMEZONES, format_time, subsecond_precision
  15from sqlglot.tokens import Token, Tokenizer, TokenType
  16from sqlglot.trie import new_trie
  17
  18DATE_ADD_OR_DIFF = t.Union[
  19    exp.DateAdd,
  20    exp.DateDiff,
  21    exp.DateSub,
  22    exp.TsOrDsAdd,
  23    exp.TsOrDsDiff,
  24]
  25DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
  26JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
  27
  28
  29if t.TYPE_CHECKING:
  30    from sqlglot._typing import B, E, F
  31
  32    from sqlglot.optimizer.annotate_types import TypeAnnotator
  33
  34    AnnotatorsType = t.Dict[t.Type[E], t.Callable[[TypeAnnotator, E], E]]
  35
  36logger = logging.getLogger("sqlglot")
  37
  38UNESCAPED_SEQUENCES = {
  39    "\\a": "\a",
  40    "\\b": "\b",
  41    "\\f": "\f",
  42    "\\n": "\n",
  43    "\\r": "\r",
  44    "\\t": "\t",
  45    "\\v": "\v",
  46    "\\\\": "\\",
  47}
  48
  49
  50def _annotate_with_type_lambda(data_type: exp.DataType.Type) -> t.Callable[[TypeAnnotator, E], E]:
  51    return lambda self, e: self._annotate_with_type(e, data_type)
  52
  53
  54class Dialects(str, Enum):
  55    """Dialects supported by SQLGLot."""
  56
  57    DIALECT = ""
  58
  59    ATHENA = "athena"
  60    BIGQUERY = "bigquery"
  61    CLICKHOUSE = "clickhouse"
  62    DATABRICKS = "databricks"
  63    DORIS = "doris"
  64    DRILL = "drill"
  65    DUCKDB = "duckdb"
  66    HIVE = "hive"
  67    MATERIALIZE = "materialize"
  68    MYSQL = "mysql"
  69    ORACLE = "oracle"
  70    POSTGRES = "postgres"
  71    PRESTO = "presto"
  72    PRQL = "prql"
  73    REDSHIFT = "redshift"
  74    RISINGWAVE = "risingwave"
  75    SNOWFLAKE = "snowflake"
  76    SPARK = "spark"
  77    SPARK2 = "spark2"
  78    SQLITE = "sqlite"
  79    STARROCKS = "starrocks"
  80    TABLEAU = "tableau"
  81    TERADATA = "teradata"
  82    TRINO = "trino"
  83    TSQL = "tsql"
  84
  85
  86class NormalizationStrategy(str, AutoName):
  87    """Specifies the strategy according to which identifiers should be normalized."""
  88
  89    LOWERCASE = auto()
  90    """Unquoted identifiers are lowercased."""
  91
  92    UPPERCASE = auto()
  93    """Unquoted identifiers are uppercased."""
  94
  95    CASE_SENSITIVE = auto()
  96    """Always case-sensitive, regardless of quotes."""
  97
  98    CASE_INSENSITIVE = auto()
  99    """Always case-insensitive, regardless of quotes."""
 100
 101
 102class _Dialect(type):
 103    classes: t.Dict[str, t.Type[Dialect]] = {}
 104
 105    def __eq__(cls, other: t.Any) -> bool:
 106        if cls is other:
 107            return True
 108        if isinstance(other, str):
 109            return cls is cls.get(other)
 110        if isinstance(other, Dialect):
 111            return cls is type(other)
 112
 113        return False
 114
 115    def __hash__(cls) -> int:
 116        return hash(cls.__name__.lower())
 117
 118    @classmethod
 119    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 120        return cls.classes[key]
 121
 122    @classmethod
 123    def get(
 124        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 125    ) -> t.Optional[t.Type[Dialect]]:
 126        return cls.classes.get(key, default)
 127
 128    def __new__(cls, clsname, bases, attrs):
 129        klass = super().__new__(cls, clsname, bases, attrs)
 130        enum = Dialects.__members__.get(clsname.upper())
 131        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 132
 133        klass.TIME_TRIE = new_trie(klass.TIME_MAPPING)
 134        klass.FORMAT_TRIE = (
 135            new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE
 136        )
 137        klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()}
 138        klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING)
 139        klass.INVERSE_FORMAT_MAPPING = {v: k for k, v in klass.FORMAT_MAPPING.items()}
 140        klass.INVERSE_FORMAT_TRIE = new_trie(klass.INVERSE_FORMAT_MAPPING)
 141
 142        klass.INVERSE_CREATABLE_KIND_MAPPING = {
 143            v: k for k, v in klass.CREATABLE_KIND_MAPPING.items()
 144        }
 145
 146        base = seq_get(bases, 0)
 147        base_tokenizer = (getattr(base, "tokenizer_class", Tokenizer),)
 148        base_jsonpath_tokenizer = (getattr(base, "jsonpath_tokenizer_class", JSONPathTokenizer),)
 149        base_parser = (getattr(base, "parser_class", Parser),)
 150        base_generator = (getattr(base, "generator_class", Generator),)
 151
 152        klass.tokenizer_class = klass.__dict__.get(
 153            "Tokenizer", type("Tokenizer", base_tokenizer, {})
 154        )
 155        klass.jsonpath_tokenizer_class = klass.__dict__.get(
 156            "JSONPathTokenizer", type("JSONPathTokenizer", base_jsonpath_tokenizer, {})
 157        )
 158        klass.parser_class = klass.__dict__.get("Parser", type("Parser", base_parser, {}))
 159        klass.generator_class = klass.__dict__.get(
 160            "Generator", type("Generator", base_generator, {})
 161        )
 162
 163        klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0]
 164        klass.IDENTIFIER_START, klass.IDENTIFIER_END = list(
 165            klass.tokenizer_class._IDENTIFIERS.items()
 166        )[0]
 167
 168        def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]:
 169            return next(
 170                (
 171                    (s, e)
 172                    for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items()
 173                    if t == token_type
 174                ),
 175                (None, None),
 176            )
 177
 178        klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING)
 179        klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING)
 180        klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING)
 181        klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING)
 182
 183        if "\\" in klass.tokenizer_class.STRING_ESCAPES:
 184            klass.UNESCAPED_SEQUENCES = {
 185                **UNESCAPED_SEQUENCES,
 186                **klass.UNESCAPED_SEQUENCES,
 187            }
 188
 189        klass.ESCAPED_SEQUENCES = {v: k for k, v in klass.UNESCAPED_SEQUENCES.items()}
 190
 191        klass.SUPPORTS_COLUMN_JOIN_MARKS = "(+)" in klass.tokenizer_class.KEYWORDS
 192
 193        if enum not in ("", "bigquery"):
 194            klass.generator_class.SELECT_KINDS = ()
 195
 196        if enum not in ("", "athena", "presto", "trino"):
 197            klass.generator_class.TRY_SUPPORTED = False
 198            klass.generator_class.SUPPORTS_UESCAPE = False
 199
 200        if enum not in ("", "databricks", "hive", "spark", "spark2"):
 201            modifier_transforms = klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS.copy()
 202            for modifier in ("cluster", "distribute", "sort"):
 203                modifier_transforms.pop(modifier, None)
 204
 205            klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS = modifier_transforms
 206
 207        if enum not in ("", "doris", "mysql"):
 208            klass.parser_class.ID_VAR_TOKENS = klass.parser_class.ID_VAR_TOKENS | {
 209                TokenType.STRAIGHT_JOIN,
 210            }
 211            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 212                TokenType.STRAIGHT_JOIN,
 213            }
 214
 215        if not klass.SUPPORTS_SEMI_ANTI_JOIN:
 216            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 217                TokenType.ANTI,
 218                TokenType.SEMI,
 219            }
 220
 221        return klass
 222
 223
 224class Dialect(metaclass=_Dialect):
 225    INDEX_OFFSET = 0
 226    """The base index offset for arrays."""
 227
 228    WEEK_OFFSET = 0
 229    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
 230
 231    UNNEST_COLUMN_ONLY = False
 232    """Whether `UNNEST` table aliases are treated as column aliases."""
 233
 234    ALIAS_POST_TABLESAMPLE = False
 235    """Whether the table alias comes after tablesample."""
 236
 237    TABLESAMPLE_SIZE_IS_PERCENT = False
 238    """Whether a size in the table sample clause represents percentage."""
 239
 240    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
 241    """Specifies the strategy according to which identifiers should be normalized."""
 242
 243    IDENTIFIERS_CAN_START_WITH_DIGIT = False
 244    """Whether an unquoted identifier can start with a digit."""
 245
 246    DPIPE_IS_STRING_CONCAT = True
 247    """Whether the DPIPE token (`||`) is a string concatenation operator."""
 248
 249    STRICT_STRING_CONCAT = False
 250    """Whether `CONCAT`'s arguments must be strings."""
 251
 252    SUPPORTS_USER_DEFINED_TYPES = True
 253    """Whether user-defined data types are supported."""
 254
 255    SUPPORTS_SEMI_ANTI_JOIN = True
 256    """Whether `SEMI` or `ANTI` joins are supported."""
 257
 258    SUPPORTS_COLUMN_JOIN_MARKS = False
 259    """Whether the old-style outer join (+) syntax is supported."""
 260
 261    COPY_PARAMS_ARE_CSV = True
 262    """Separator of COPY statement parameters."""
 263
 264    NORMALIZE_FUNCTIONS: bool | str = "upper"
 265    """
 266    Determines how function names are going to be normalized.
 267    Possible values:
 268        "upper" or True: Convert names to uppercase.
 269        "lower": Convert names to lowercase.
 270        False: Disables function name normalization.
 271    """
 272
 273    PRESERVE_ORIGINAL_NAMES: bool = False
 274    """
 275    Whether the name of the function should be preserved inside the node's metadata,
 276    can be useful for roundtripping deprecated vs new functions that share an AST node
 277    e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
 278    """
 279
 280    LOG_BASE_FIRST: t.Optional[bool] = True
 281    """
 282    Whether the base comes first in the `LOG` function.
 283    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
 284    """
 285
 286    NULL_ORDERING = "nulls_are_small"
 287    """
 288    Default `NULL` ordering method to use if not explicitly set.
 289    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
 290    """
 291
 292    TYPED_DIVISION = False
 293    """
 294    Whether the behavior of `a / b` depends on the types of `a` and `b`.
 295    False means `a / b` is always float division.
 296    True means `a / b` is integer division if both `a` and `b` are integers.
 297    """
 298
 299    SAFE_DIVISION = False
 300    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
 301
 302    CONCAT_COALESCE = False
 303    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
 304
 305    HEX_LOWERCASE = False
 306    """Whether the `HEX` function returns a lowercase hexadecimal string."""
 307
 308    DATE_FORMAT = "'%Y-%m-%d'"
 309    DATEINT_FORMAT = "'%Y%m%d'"
 310    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
 311
 312    TIME_MAPPING: t.Dict[str, str] = {}
 313    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
 314
 315    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
 316    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
 317    FORMAT_MAPPING: t.Dict[str, str] = {}
 318    """
 319    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
 320    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
 321    """
 322
 323    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
 324    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
 325
 326    PSEUDOCOLUMNS: t.Set[str] = set()
 327    """
 328    Columns that are auto-generated by the engine corresponding to this dialect.
 329    For example, such columns may be excluded from `SELECT *` queries.
 330    """
 331
 332    PREFER_CTE_ALIAS_COLUMN = False
 333    """
 334    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
 335    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
 336    any projection aliases in the subquery.
 337
 338    For example,
 339        WITH y(c) AS (
 340            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
 341        ) SELECT c FROM y;
 342
 343        will be rewritten as
 344
 345        WITH y(c) AS (
 346            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
 347        ) SELECT c FROM y;
 348    """
 349
 350    COPY_PARAMS_ARE_CSV = True
 351    """
 352    Whether COPY statement parameters are separated by comma or whitespace
 353    """
 354
 355    FORCE_EARLY_ALIAS_REF_EXPANSION = False
 356    """
 357    Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
 358
 359    For example:
 360        WITH data AS (
 361        SELECT
 362            1 AS id,
 363            2 AS my_id
 364        )
 365        SELECT
 366            id AS my_id
 367        FROM
 368            data
 369        WHERE
 370            my_id = 1
 371        GROUP BY
 372            my_id,
 373        HAVING
 374            my_id = 1
 375
 376    In most dialects, "my_id" would refer to "data.my_id" across the query, except:
 377        - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e
 378          it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1"
 379        - Clickhouse, which will forward the alias across the query i.e it resolves
 380        to "WHERE id = 1 GROUP BY id HAVING id = 1"
 381    """
 382
 383    EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False
 384    """Whether alias reference expansion before qualification should only happen for the GROUP BY clause."""
 385
 386    SUPPORTS_ORDER_BY_ALL = False
 387    """
 388    Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
 389    """
 390
 391    HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
 392    """
 393    Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)
 394    as the former is of type INT[] vs the latter which is SUPER
 395    """
 396
 397    SUPPORTS_FIXED_SIZE_ARRAYS = False
 398    """
 399    Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g.
 400    in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should
 401    be interpreted as a subscript/index operator.
 402    """
 403
 404    STRICT_JSON_PATH_SYNTAX = True
 405    """Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning."""
 406
 407    ON_CONDITION_EMPTY_BEFORE_ERROR = True
 408    """Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle)."""
 409
 410    ARRAY_AGG_INCLUDES_NULLS: t.Optional[bool] = True
 411    """Whether ArrayAgg needs to filter NULL values."""
 412
 413    PROMOTE_TO_INFERRED_DATETIME_TYPE = False
 414    """
 415    This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted
 416    to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal
 417    is cast to x's type to match it instead.
 418    """
 419
 420    SUPPORTS_VALUES_DEFAULT = True
 421    """Whether the DEFAULT keyword is supported in the VALUES clause."""
 422
 423    NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = False
 424    """Whether number literals can include underscores for better readability"""
 425
 426    REGEXP_EXTRACT_DEFAULT_GROUP = 0
 427    """The default value for the capturing group."""
 428
 429    SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = {
 430        exp.Except: True,
 431        exp.Intersect: True,
 432        exp.Union: True,
 433    }
 434    """
 435    Whether a set operation uses DISTINCT by default. This is `None` when either `DISTINCT` or `ALL`
 436    must be explicitly specified.
 437    """
 438
 439    CREATABLE_KIND_MAPPING: dict[str, str] = {}
 440    """
 441    Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse
 442    equivalent of CREATE SCHEMA is CREATE DATABASE.
 443    """
 444
 445    # --- Autofilled ---
 446
 447    tokenizer_class = Tokenizer
 448    jsonpath_tokenizer_class = JSONPathTokenizer
 449    parser_class = Parser
 450    generator_class = Generator
 451
 452    # A trie of the time_mapping keys
 453    TIME_TRIE: t.Dict = {}
 454    FORMAT_TRIE: t.Dict = {}
 455
 456    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
 457    INVERSE_TIME_TRIE: t.Dict = {}
 458    INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {}
 459    INVERSE_FORMAT_TRIE: t.Dict = {}
 460
 461    INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
 462
 463    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
 464
 465    # Delimiters for string literals and identifiers
 466    QUOTE_START = "'"
 467    QUOTE_END = "'"
 468    IDENTIFIER_START = '"'
 469    IDENTIFIER_END = '"'
 470
 471    # Delimiters for bit, hex, byte and unicode literals
 472    BIT_START: t.Optional[str] = None
 473    BIT_END: t.Optional[str] = None
 474    HEX_START: t.Optional[str] = None
 475    HEX_END: t.Optional[str] = None
 476    BYTE_START: t.Optional[str] = None
 477    BYTE_END: t.Optional[str] = None
 478    UNICODE_START: t.Optional[str] = None
 479    UNICODE_END: t.Optional[str] = None
 480
 481    DATE_PART_MAPPING = {
 482        "Y": "YEAR",
 483        "YY": "YEAR",
 484        "YYY": "YEAR",
 485        "YYYY": "YEAR",
 486        "YR": "YEAR",
 487        "YEARS": "YEAR",
 488        "YRS": "YEAR",
 489        "MM": "MONTH",
 490        "MON": "MONTH",
 491        "MONS": "MONTH",
 492        "MONTHS": "MONTH",
 493        "D": "DAY",
 494        "DD": "DAY",
 495        "DAYS": "DAY",
 496        "DAYOFMONTH": "DAY",
 497        "DAY OF WEEK": "DAYOFWEEK",
 498        "WEEKDAY": "DAYOFWEEK",
 499        "DOW": "DAYOFWEEK",
 500        "DW": "DAYOFWEEK",
 501        "WEEKDAY_ISO": "DAYOFWEEKISO",
 502        "DOW_ISO": "DAYOFWEEKISO",
 503        "DW_ISO": "DAYOFWEEKISO",
 504        "DAY OF YEAR": "DAYOFYEAR",
 505        "DOY": "DAYOFYEAR",
 506        "DY": "DAYOFYEAR",
 507        "W": "WEEK",
 508        "WK": "WEEK",
 509        "WEEKOFYEAR": "WEEK",
 510        "WOY": "WEEK",
 511        "WY": "WEEK",
 512        "WEEK_ISO": "WEEKISO",
 513        "WEEKOFYEARISO": "WEEKISO",
 514        "WEEKOFYEAR_ISO": "WEEKISO",
 515        "Q": "QUARTER",
 516        "QTR": "QUARTER",
 517        "QTRS": "QUARTER",
 518        "QUARTERS": "QUARTER",
 519        "H": "HOUR",
 520        "HH": "HOUR",
 521        "HR": "HOUR",
 522        "HOURS": "HOUR",
 523        "HRS": "HOUR",
 524        "M": "MINUTE",
 525        "MI": "MINUTE",
 526        "MIN": "MINUTE",
 527        "MINUTES": "MINUTE",
 528        "MINS": "MINUTE",
 529        "S": "SECOND",
 530        "SEC": "SECOND",
 531        "SECONDS": "SECOND",
 532        "SECS": "SECOND",
 533        "MS": "MILLISECOND",
 534        "MSEC": "MILLISECOND",
 535        "MSECS": "MILLISECOND",
 536        "MSECOND": "MILLISECOND",
 537        "MSECONDS": "MILLISECOND",
 538        "MILLISEC": "MILLISECOND",
 539        "MILLISECS": "MILLISECOND",
 540        "MILLISECON": "MILLISECOND",
 541        "MILLISECONDS": "MILLISECOND",
 542        "US": "MICROSECOND",
 543        "USEC": "MICROSECOND",
 544        "USECS": "MICROSECOND",
 545        "MICROSEC": "MICROSECOND",
 546        "MICROSECS": "MICROSECOND",
 547        "USECOND": "MICROSECOND",
 548        "USECONDS": "MICROSECOND",
 549        "MICROSECONDS": "MICROSECOND",
 550        "NS": "NANOSECOND",
 551        "NSEC": "NANOSECOND",
 552        "NANOSEC": "NANOSECOND",
 553        "NSECOND": "NANOSECOND",
 554        "NSECONDS": "NANOSECOND",
 555        "NANOSECS": "NANOSECOND",
 556        "EPOCH_SECOND": "EPOCH",
 557        "EPOCH_SECONDS": "EPOCH",
 558        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
 559        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
 560        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
 561        "TZH": "TIMEZONE_HOUR",
 562        "TZM": "TIMEZONE_MINUTE",
 563        "DEC": "DECADE",
 564        "DECS": "DECADE",
 565        "DECADES": "DECADE",
 566        "MIL": "MILLENIUM",
 567        "MILS": "MILLENIUM",
 568        "MILLENIA": "MILLENIUM",
 569        "C": "CENTURY",
 570        "CENT": "CENTURY",
 571        "CENTS": "CENTURY",
 572        "CENTURIES": "CENTURY",
 573    }
 574
 575    TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = {
 576        exp.DataType.Type.BIGINT: {
 577            exp.ApproxDistinct,
 578            exp.ArraySize,
 579            exp.Length,
 580        },
 581        exp.DataType.Type.BOOLEAN: {
 582            exp.Between,
 583            exp.Boolean,
 584            exp.In,
 585            exp.RegexpLike,
 586        },
 587        exp.DataType.Type.DATE: {
 588            exp.CurrentDate,
 589            exp.Date,
 590            exp.DateFromParts,
 591            exp.DateStrToDate,
 592            exp.DiToDate,
 593            exp.StrToDate,
 594            exp.TimeStrToDate,
 595            exp.TsOrDsToDate,
 596        },
 597        exp.DataType.Type.DATETIME: {
 598            exp.CurrentDatetime,
 599            exp.Datetime,
 600            exp.DatetimeAdd,
 601            exp.DatetimeSub,
 602        },
 603        exp.DataType.Type.DOUBLE: {
 604            exp.ApproxQuantile,
 605            exp.Avg,
 606            exp.Exp,
 607            exp.Ln,
 608            exp.Log,
 609            exp.Pow,
 610            exp.Quantile,
 611            exp.Round,
 612            exp.SafeDivide,
 613            exp.Sqrt,
 614            exp.Stddev,
 615            exp.StddevPop,
 616            exp.StddevSamp,
 617            exp.ToDouble,
 618            exp.Variance,
 619            exp.VariancePop,
 620        },
 621        exp.DataType.Type.INT: {
 622            exp.Ceil,
 623            exp.DatetimeDiff,
 624            exp.DateDiff,
 625            exp.TimestampDiff,
 626            exp.TimeDiff,
 627            exp.DateToDi,
 628            exp.Levenshtein,
 629            exp.Sign,
 630            exp.StrPosition,
 631            exp.TsOrDiToDi,
 632        },
 633        exp.DataType.Type.JSON: {
 634            exp.ParseJSON,
 635        },
 636        exp.DataType.Type.TIME: {
 637            exp.Time,
 638        },
 639        exp.DataType.Type.TIMESTAMP: {
 640            exp.CurrentTime,
 641            exp.CurrentTimestamp,
 642            exp.StrToTime,
 643            exp.TimeAdd,
 644            exp.TimeStrToTime,
 645            exp.TimeSub,
 646            exp.TimestampAdd,
 647            exp.TimestampSub,
 648            exp.UnixToTime,
 649        },
 650        exp.DataType.Type.TINYINT: {
 651            exp.Day,
 652            exp.Month,
 653            exp.Week,
 654            exp.Year,
 655            exp.Quarter,
 656        },
 657        exp.DataType.Type.VARCHAR: {
 658            exp.ArrayConcat,
 659            exp.Concat,
 660            exp.ConcatWs,
 661            exp.DateToDateStr,
 662            exp.GroupConcat,
 663            exp.Initcap,
 664            exp.Lower,
 665            exp.Substring,
 666            exp.String,
 667            exp.TimeToStr,
 668            exp.TimeToTimeStr,
 669            exp.Trim,
 670            exp.TsOrDsToDateStr,
 671            exp.UnixToStr,
 672            exp.UnixToTimeStr,
 673            exp.Upper,
 674        },
 675    }
 676
 677    ANNOTATORS: AnnotatorsType = {
 678        **{
 679            expr_type: lambda self, e: self._annotate_unary(e)
 680            for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
 681        },
 682        **{
 683            expr_type: lambda self, e: self._annotate_binary(e)
 684            for expr_type in subclasses(exp.__name__, exp.Binary)
 685        },
 686        **{
 687            expr_type: _annotate_with_type_lambda(data_type)
 688            for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
 689            for expr_type in expressions
 690        },
 691        exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
 692        exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
 693        exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
 694        exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
 695        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 696        exp.Bracket: lambda self, e: self._annotate_bracket(e),
 697        exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
 698        exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
 699        exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 700        exp.Count: lambda self, e: self._annotate_with_type(
 701            e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
 702        ),
 703        exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()),
 704        exp.DateAdd: lambda self, e: self._annotate_timeunit(e),
 705        exp.DateSub: lambda self, e: self._annotate_timeunit(e),
 706        exp.DateTrunc: lambda self, e: self._annotate_timeunit(e),
 707        exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"),
 708        exp.Div: lambda self, e: self._annotate_div(e),
 709        exp.Dot: lambda self, e: self._annotate_dot(e),
 710        exp.Explode: lambda self, e: self._annotate_explode(e),
 711        exp.Extract: lambda self, e: self._annotate_extract(e),
 712        exp.Filter: lambda self, e: self._annotate_by_args(e, "this"),
 713        exp.GenerateDateArray: lambda self, e: self._annotate_with_type(
 714            e, exp.DataType.build("ARRAY<DATE>")
 715        ),
 716        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
 717            e, exp.DataType.build("ARRAY<TIMESTAMP>")
 718        ),
 719        exp.Greatest: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 720        exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"),
 721        exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL),
 722        exp.Least: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 723        exp.Literal: lambda self, e: self._annotate_literal(e),
 724        exp.Map: lambda self, e: self._annotate_map(e),
 725        exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 726        exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 727        exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL),
 728        exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"),
 729        exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"),
 730        exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
 731        exp.Struct: lambda self, e: self._annotate_struct(e),
 732        exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True),
 733        exp.Timestamp: lambda self, e: self._annotate_with_type(
 734            e,
 735            exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
 736        ),
 737        exp.ToMap: lambda self, e: self._annotate_to_map(e),
 738        exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
 739        exp.Unnest: lambda self, e: self._annotate_unnest(e),
 740        exp.VarMap: lambda self, e: self._annotate_map(e),
 741    }
 742
 743    @classmethod
 744    def get_or_raise(cls, dialect: DialectType) -> Dialect:
 745        """
 746        Look up a dialect in the global dialect registry and return it if it exists.
 747
 748        Args:
 749            dialect: The target dialect. If this is a string, it can be optionally followed by
 750                additional key-value pairs that are separated by commas and are used to specify
 751                dialect settings, such as whether the dialect's identifiers are case-sensitive.
 752
 753        Example:
 754            >>> dialect = dialect_class = get_or_raise("duckdb")
 755            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
 756
 757        Returns:
 758            The corresponding Dialect instance.
 759        """
 760
 761        if not dialect:
 762            return cls()
 763        if isinstance(dialect, _Dialect):
 764            return dialect()
 765        if isinstance(dialect, Dialect):
 766            return dialect
 767        if isinstance(dialect, str):
 768            try:
 769                dialect_name, *kv_strings = dialect.split(",")
 770                kv_pairs = (kv.split("=") for kv in kv_strings)
 771                kwargs = {}
 772                for pair in kv_pairs:
 773                    key = pair[0].strip()
 774                    value: t.Union[bool | str | None] = None
 775
 776                    if len(pair) == 1:
 777                        # Default initialize standalone settings to True
 778                        value = True
 779                    elif len(pair) == 2:
 780                        value = pair[1].strip()
 781
 782                    kwargs[key] = to_bool(value)
 783
 784            except ValueError:
 785                raise ValueError(
 786                    f"Invalid dialect format: '{dialect}'. "
 787                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
 788                )
 789
 790            result = cls.get(dialect_name.strip())
 791            if not result:
 792                from difflib import get_close_matches
 793
 794                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
 795                if similar:
 796                    similar = f" Did you mean {similar}?"
 797
 798                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
 799
 800            return result(**kwargs)
 801
 802        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
 803
 804    @classmethod
 805    def format_time(
 806        cls, expression: t.Optional[str | exp.Expression]
 807    ) -> t.Optional[exp.Expression]:
 808        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
 809        if isinstance(expression, str):
 810            return exp.Literal.string(
 811                # the time formats are quoted
 812                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
 813            )
 814
 815        if expression and expression.is_string:
 816            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
 817
 818        return expression
 819
 820    def __init__(self, **kwargs) -> None:
 821        normalization_strategy = kwargs.pop("normalization_strategy", None)
 822
 823        if normalization_strategy is None:
 824            self.normalization_strategy = self.NORMALIZATION_STRATEGY
 825        else:
 826            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
 827
 828        self.settings = kwargs
 829
 830    def __eq__(self, other: t.Any) -> bool:
 831        # Does not currently take dialect state into account
 832        return type(self) == other
 833
 834    def __hash__(self) -> int:
 835        # Does not currently take dialect state into account
 836        return hash(type(self))
 837
 838    def normalize_identifier(self, expression: E) -> E:
 839        """
 840        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
 841
 842        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
 843        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
 844        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
 845        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
 846
 847        There are also dialects like Spark, which are case-insensitive even when quotes are
 848        present, and dialects like MySQL, whose resolution rules match those employed by the
 849        underlying operating system, for example they may always be case-sensitive in Linux.
 850
 851        Finally, the normalization behavior of some engines can even be controlled through flags,
 852        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
 853
 854        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
 855        that it can analyze queries in the optimizer and successfully capture their semantics.
 856        """
 857        if (
 858            isinstance(expression, exp.Identifier)
 859            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
 860            and (
 861                not expression.quoted
 862                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
 863            )
 864        ):
 865            expression.set(
 866                "this",
 867                (
 868                    expression.this.upper()
 869                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 870                    else expression.this.lower()
 871                ),
 872            )
 873
 874        return expression
 875
 876    def case_sensitive(self, text: str) -> bool:
 877        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
 878        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
 879            return False
 880
 881        unsafe = (
 882            str.islower
 883            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 884            else str.isupper
 885        )
 886        return any(unsafe(char) for char in text)
 887
 888    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
 889        """Checks if text can be identified given an identify option.
 890
 891        Args:
 892            text: The text to check.
 893            identify:
 894                `"always"` or `True`: Always returns `True`.
 895                `"safe"`: Only returns `True` if the identifier is case-insensitive.
 896
 897        Returns:
 898            Whether the given text can be identified.
 899        """
 900        if identify is True or identify == "always":
 901            return True
 902
 903        if identify == "safe":
 904            return not self.case_sensitive(text)
 905
 906        return False
 907
 908    def quote_identifier(self, expression: E, identify: bool = True) -> E:
 909        """
 910        Adds quotes to a given identifier.
 911
 912        Args:
 913            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
 914            identify: If set to `False`, the quotes will only be added if the identifier is deemed
 915                "unsafe", with respect to its characters and this dialect's normalization strategy.
 916        """
 917        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
 918            name = expression.this
 919            expression.set(
 920                "quoted",
 921                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
 922            )
 923
 924        return expression
 925
 926    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
 927        if isinstance(path, exp.Literal):
 928            path_text = path.name
 929            if path.is_number:
 930                path_text = f"[{path_text}]"
 931            try:
 932                return parse_json_path(path_text, self)
 933            except ParseError as e:
 934                if self.STRICT_JSON_PATH_SYNTAX:
 935                    logger.warning(f"Invalid JSON path syntax. {str(e)}")
 936
 937        return path
 938
 939    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
 940        return self.parser(**opts).parse(self.tokenize(sql), sql)
 941
 942    def parse_into(
 943        self, expression_type: exp.IntoType, sql: str, **opts
 944    ) -> t.List[t.Optional[exp.Expression]]:
 945        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
 946
 947    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
 948        return self.generator(**opts).generate(expression, copy=copy)
 949
 950    def transpile(self, sql: str, **opts) -> t.List[str]:
 951        return [
 952            self.generate(expression, copy=False, **opts) if expression else ""
 953            for expression in self.parse(sql)
 954        ]
 955
 956    def tokenize(self, sql: str) -> t.List[Token]:
 957        return self.tokenizer.tokenize(sql)
 958
 959    @property
 960    def tokenizer(self) -> Tokenizer:
 961        return self.tokenizer_class(dialect=self)
 962
 963    @property
 964    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
 965        return self.jsonpath_tokenizer_class(dialect=self)
 966
 967    def parser(self, **opts) -> Parser:
 968        return self.parser_class(dialect=self, **opts)
 969
 970    def generator(self, **opts) -> Generator:
 971        return self.generator_class(dialect=self, **opts)
 972
 973
 974DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
 975
 976
 977def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
 978    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
 979
 980
 981@unsupported_args("accuracy")
 982def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
 983    return self.func("APPROX_COUNT_DISTINCT", expression.this)
 984
 985
 986def if_sql(
 987    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
 988) -> t.Callable[[Generator, exp.If], str]:
 989    def _if_sql(self: Generator, expression: exp.If) -> str:
 990        return self.func(
 991            name,
 992            expression.this,
 993            expression.args.get("true"),
 994            expression.args.get("false") or false_value,
 995        )
 996
 997    return _if_sql
 998
 999
1000def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1001    this = expression.this
1002    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
1003        this.replace(exp.cast(this, exp.DataType.Type.JSON))
1004
1005    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
1006
1007
1008def inline_array_sql(self: Generator, expression: exp.Array) -> str:
1009    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
1010
1011
1012def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
1013    elem = seq_get(expression.expressions, 0)
1014    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
1015        return self.func("ARRAY", elem)
1016    return inline_array_sql(self, expression)
1017
1018
1019def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
1020    return self.like_sql(
1021        exp.Like(
1022            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
1023        )
1024    )
1025
1026
1027def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
1028    zone = self.sql(expression, "this")
1029    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
1030
1031
1032def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
1033    if expression.args.get("recursive"):
1034        self.unsupported("Recursive CTEs are unsupported")
1035        expression.args["recursive"] = False
1036    return self.with_sql(expression)
1037
1038
1039def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide, if_sql: str = "IF") -> str:
1040    n = self.sql(expression, "this")
1041    d = self.sql(expression, "expression")
1042    return f"{if_sql}(({d}) <> 0, ({n}) / ({d}), NULL)"
1043
1044
1045def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
1046    self.unsupported("TABLESAMPLE unsupported")
1047    return self.sql(expression.this)
1048
1049
1050def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
1051    self.unsupported("PIVOT unsupported")
1052    return ""
1053
1054
1055def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
1056    return self.cast_sql(expression)
1057
1058
1059def no_comment_column_constraint_sql(
1060    self: Generator, expression: exp.CommentColumnConstraint
1061) -> str:
1062    self.unsupported("CommentColumnConstraint unsupported")
1063    return ""
1064
1065
1066def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
1067    self.unsupported("MAP_FROM_ENTRIES unsupported")
1068    return ""
1069
1070
1071def property_sql(self: Generator, expression: exp.Property) -> str:
1072    return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
1073
1074
1075def str_position_sql(
1076    self: Generator,
1077    expression: exp.StrPosition,
1078    generate_instance: bool = False,
1079    str_position_func_name: str = "STRPOS",
1080) -> str:
1081    this = self.sql(expression, "this")
1082    substr = self.sql(expression, "substr")
1083    position = self.sql(expression, "position")
1084    instance = expression.args.get("instance") if generate_instance else None
1085    position_offset = ""
1086
1087    if position:
1088        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
1089        this = self.func("SUBSTR", this, position)
1090        position_offset = f" + {position} - 1"
1091
1092    return self.func(str_position_func_name, this, substr, instance) + position_offset
1093
1094
1095def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
1096    return (
1097        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
1098    )
1099
1100
1101def var_map_sql(
1102    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
1103) -> str:
1104    keys = expression.args["keys"]
1105    values = expression.args["values"]
1106
1107    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
1108        self.unsupported("Cannot convert array columns into map.")
1109        return self.func(map_func_name, keys, values)
1110
1111    args = []
1112    for key, value in zip(keys.expressions, values.expressions):
1113        args.append(self.sql(key))
1114        args.append(self.sql(value))
1115
1116    return self.func(map_func_name, *args)
1117
1118
1119def build_formatted_time(
1120    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
1121) -> t.Callable[[t.List], E]:
1122    """Helper used for time expressions.
1123
1124    Args:
1125        exp_class: the expression class to instantiate.
1126        dialect: target sql dialect.
1127        default: the default format, True being time.
1128
1129    Returns:
1130        A callable that can be used to return the appropriately formatted time expression.
1131    """
1132
1133    def _builder(args: t.List):
1134        return exp_class(
1135            this=seq_get(args, 0),
1136            format=Dialect[dialect].format_time(
1137                seq_get(args, 1)
1138                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
1139            ),
1140        )
1141
1142    return _builder
1143
1144
1145def time_format(
1146    dialect: DialectType = None,
1147) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
1148    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
1149        """
1150        Returns the time format for a given expression, unless it's equivalent
1151        to the default time format of the dialect of interest.
1152        """
1153        time_format = self.format_time(expression)
1154        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
1155
1156    return _time_format
1157
1158
1159def build_date_delta(
1160    exp_class: t.Type[E],
1161    unit_mapping: t.Optional[t.Dict[str, str]] = None,
1162    default_unit: t.Optional[str] = "DAY",
1163) -> t.Callable[[t.List], E]:
1164    def _builder(args: t.List) -> E:
1165        unit_based = len(args) == 3
1166        this = args[2] if unit_based else seq_get(args, 0)
1167        unit = None
1168        if unit_based or default_unit:
1169            unit = args[0] if unit_based else exp.Literal.string(default_unit)
1170            unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
1171        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
1172
1173    return _builder
1174
1175
1176def build_date_delta_with_interval(
1177    expression_class: t.Type[E],
1178) -> t.Callable[[t.List], t.Optional[E]]:
1179    def _builder(args: t.List) -> t.Optional[E]:
1180        if len(args) < 2:
1181            return None
1182
1183        interval = args[1]
1184
1185        if not isinstance(interval, exp.Interval):
1186            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
1187
1188        return expression_class(this=args[0], expression=interval.this, unit=unit_to_str(interval))
1189
1190    return _builder
1191
1192
1193def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
1194    unit = seq_get(args, 0)
1195    this = seq_get(args, 1)
1196
1197    if isinstance(this, exp.Cast) and this.is_type("date"):
1198        return exp.DateTrunc(unit=unit, this=this)
1199    return exp.TimestampTrunc(this=this, unit=unit)
1200
1201
1202def date_add_interval_sql(
1203    data_type: str, kind: str
1204) -> t.Callable[[Generator, exp.Expression], str]:
1205    def func(self: Generator, expression: exp.Expression) -> str:
1206        this = self.sql(expression, "this")
1207        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
1208        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
1209
1210    return func
1211
1212
1213def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
1214    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
1215        args = [unit_to_str(expression), expression.this]
1216        if zone:
1217            args.append(expression.args.get("zone"))
1218        return self.func("DATE_TRUNC", *args)
1219
1220    return _timestamptrunc_sql
1221
1222
1223def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
1224    zone = expression.args.get("zone")
1225    if not zone:
1226        from sqlglot.optimizer.annotate_types import annotate_types
1227
1228        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
1229        return self.sql(exp.cast(expression.this, target_type))
1230    if zone.name.lower() in TIMEZONES:
1231        return self.sql(
1232            exp.AtTimeZone(
1233                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
1234                zone=zone,
1235            )
1236        )
1237    return self.func("TIMESTAMP", expression.this, zone)
1238
1239
1240def no_time_sql(self: Generator, expression: exp.Time) -> str:
1241    # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME)
1242    this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)
1243    expr = exp.cast(
1244        exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME
1245    )
1246    return self.sql(expr)
1247
1248
1249def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str:
1250    this = expression.this
1251    expr = expression.expression
1252
1253    if expr.name.lower() in TIMEZONES:
1254        # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP)
1255        this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ)
1256        this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP)
1257        return self.sql(this)
1258
1259    this = exp.cast(this, exp.DataType.Type.DATE)
1260    expr = exp.cast(expr, exp.DataType.Type.TIME)
1261
1262    return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
1263
1264
1265def locate_to_strposition(args: t.List) -> exp.Expression:
1266    return exp.StrPosition(
1267        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
1268    )
1269
1270
1271def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
1272    return self.func(
1273        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
1274    )
1275
1276
1277def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1278    return self.sql(
1279        exp.Substring(
1280            this=expression.this, start=exp.Literal.number(1), length=expression.expression
1281        )
1282    )
1283
1284
1285def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1286    return self.sql(
1287        exp.Substring(
1288            this=expression.this,
1289            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
1290        )
1291    )
1292
1293
1294def timestrtotime_sql(
1295    self: Generator,
1296    expression: exp.TimeStrToTime,
1297    include_precision: bool = False,
1298) -> str:
1299    datatype = exp.DataType.build(
1300        exp.DataType.Type.TIMESTAMPTZ
1301        if expression.args.get("zone")
1302        else exp.DataType.Type.TIMESTAMP
1303    )
1304
1305    if isinstance(expression.this, exp.Literal) and include_precision:
1306        precision = subsecond_precision(expression.this.name)
1307        if precision > 0:
1308            datatype = exp.DataType.build(
1309                datatype.this, expressions=[exp.DataTypeParam(this=exp.Literal.number(precision))]
1310            )
1311
1312    return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))
1313
1314
1315def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
1316    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
1317
1318
1319# Used for Presto and Duckdb which use functions that don't support charset, and assume utf-8
1320def encode_decode_sql(
1321    self: Generator, expression: exp.Expression, name: str, replace: bool = True
1322) -> str:
1323    charset = expression.args.get("charset")
1324    if charset and charset.name.lower() != "utf-8":
1325        self.unsupported(f"Expected utf-8 character set, got {charset}.")
1326
1327    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
1328
1329
1330def min_or_least(self: Generator, expression: exp.Min) -> str:
1331    name = "LEAST" if expression.expressions else "MIN"
1332    return rename_func(name)(self, expression)
1333
1334
1335def max_or_greatest(self: Generator, expression: exp.Max) -> str:
1336    name = "GREATEST" if expression.expressions else "MAX"
1337    return rename_func(name)(self, expression)
1338
1339
1340def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
1341    cond = expression.this
1342
1343    if isinstance(expression.this, exp.Distinct):
1344        cond = expression.this.expressions[0]
1345        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
1346
1347    return self.func("sum", exp.func("if", cond, 1, 0))
1348
1349
1350def trim_sql(self: Generator, expression: exp.Trim) -> str:
1351    target = self.sql(expression, "this")
1352    trim_type = self.sql(expression, "position")
1353    remove_chars = self.sql(expression, "expression")
1354    collation = self.sql(expression, "collation")
1355
1356    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
1357    if not remove_chars:
1358        return self.trim_sql(expression)
1359
1360    trim_type = f"{trim_type} " if trim_type else ""
1361    remove_chars = f"{remove_chars} " if remove_chars else ""
1362    from_part = "FROM " if trim_type or remove_chars else ""
1363    collation = f" COLLATE {collation}" if collation else ""
1364    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
1365
1366
1367def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1368    return self.func("STRPTIME", expression.this, self.format_time(expression))
1369
1370
1371def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1372    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
1373
1374
1375def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1376    delim, *rest_args = expression.expressions
1377    return self.sql(
1378        reduce(
1379            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1380            rest_args,
1381        )
1382    )
1383
1384
1385@unsupported_args("position", "occurrence", "parameters")
1386def regexp_extract_sql(
1387    self: Generator, expression: exp.RegexpExtract | exp.RegexpExtractAll
1388) -> str:
1389    group = expression.args.get("group")
1390
1391    # Do not render group if it's the default value for this dialect
1392    if group and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP):
1393        group = None
1394
1395    return self.func(expression.sql_name(), expression.this, expression.expression, group)
1396
1397
1398@unsupported_args("position", "occurrence", "modifiers")
1399def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1400    return self.func(
1401        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1402    )
1403
1404
1405def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]:
1406    names = []
1407    for agg in aggregations:
1408        if isinstance(agg, exp.Alias):
1409            names.append(agg.alias)
1410        else:
1411            """
1412            This case corresponds to aggregations without aliases being used as suffixes
1413            (e.g. col_avg(foo)). We need to unquote identifiers because they're going to
1414            be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`.
1415            Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes).
1416            """
1417            agg_all_unquoted = agg.transform(
1418                lambda node: (
1419                    exp.Identifier(this=node.name, quoted=False)
1420                    if isinstance(node, exp.Identifier)
1421                    else node
1422                )
1423            )
1424            names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower"))
1425
1426    return names
1427
1428
1429def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]:
1430    return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1))
1431
1432
1433# Used to represent DATE_TRUNC in Doris, Postgres and Starrocks dialects
1434def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc:
1435    return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0))
1436
1437
1438def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str:
1439    return self.func("MAX", expression.this)
1440
1441
1442def bool_xor_sql(self: Generator, expression: exp.Xor) -> str:
1443    a = self.sql(expression.left)
1444    b = self.sql(expression.right)
1445    return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})"
1446
1447
1448def is_parse_json(expression: exp.Expression) -> bool:
1449    return isinstance(expression, exp.ParseJSON) or (
1450        isinstance(expression, exp.Cast) and expression.is_type("json")
1451    )
1452
1453
1454def isnull_to_is_null(args: t.List) -> exp.Expression:
1455    return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null()))
1456
1457
1458def generatedasidentitycolumnconstraint_sql(
1459    self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint
1460) -> str:
1461    start = self.sql(expression, "start") or "1"
1462    increment = self.sql(expression, "increment") or "1"
1463    return f"IDENTITY({start}, {increment})"
1464
1465
1466def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]:
1467    @unsupported_args("count")
1468    def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str:
1469        return self.func(name, expression.this, expression.expression)
1470
1471    return _arg_max_or_min_sql
1472
1473
1474def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd:
1475    this = expression.this.copy()
1476
1477    return_type = expression.return_type
1478    if return_type.is_type(exp.DataType.Type.DATE):
1479        # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we
1480        # can truncate timestamp strings, because some dialects can't cast them to DATE
1481        this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1482
1483    expression.this.replace(exp.cast(this, return_type))
1484    return expression
1485
1486
1487def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]:
1488    def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str:
1489        if cast and isinstance(expression, exp.TsOrDsAdd):
1490            expression = ts_or_ds_add_cast(expression)
1491
1492        return self.func(
1493            name,
1494            unit_to_var(expression),
1495            expression.expression,
1496            expression.this,
1497        )
1498
1499    return _delta_sql
1500
1501
1502def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1503    unit = expression.args.get("unit")
1504
1505    if isinstance(unit, exp.Placeholder):
1506        return unit
1507    if unit:
1508        return exp.Literal.string(unit.name)
1509    return exp.Literal.string(default) if default else None
1510
1511
1512def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1513    unit = expression.args.get("unit")
1514
1515    if isinstance(unit, (exp.Var, exp.Placeholder)):
1516        return unit
1517    return exp.Var(this=default) if default else None
1518
1519
1520@t.overload
1521def map_date_part(part: exp.Expression, dialect: DialectType = Dialect) -> exp.Var:
1522    pass
1523
1524
1525@t.overload
1526def map_date_part(
1527    part: t.Optional[exp.Expression], dialect: DialectType = Dialect
1528) -> t.Optional[exp.Expression]:
1529    pass
1530
1531
1532def map_date_part(part, dialect: DialectType = Dialect):
1533    mapped = (
1534        Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1535    )
1536    return exp.var(mapped) if mapped else part
1537
1538
1539def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
1540    trunc_curr_date = exp.func("date_trunc", "month", expression.this)
1541    plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
1542    minus_one_day = exp.func("date_sub", plus_one_month, 1, "day")
1543
1544    return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
1545
1546
1547def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1548    """Remove table refs from columns in when statements."""
1549    alias = expression.this.args.get("alias")
1550
1551    def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]:
1552        return self.dialect.normalize_identifier(identifier).name if identifier else None
1553
1554    targets = {normalize(expression.this.this)}
1555
1556    if alias:
1557        targets.add(normalize(alias.this))
1558
1559    for when in expression.args["whens"].expressions:
1560        # only remove the target names from the THEN clause
1561        # theyre still valid in the <condition> part of WHEN MATCHED / WHEN NOT MATCHED
1562        # ref: https://github.com/TobikoData/sqlmesh/issues/2934
1563        then = when.args.get("then")
1564        if then:
1565            then.transform(
1566                lambda node: (
1567                    exp.column(node.this)
1568                    if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
1569                    else node
1570                ),
1571                copy=False,
1572            )
1573
1574    return self.merge_sql(expression)
1575
1576
1577def build_json_extract_path(
1578    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1579) -> t.Callable[[t.List], F]:
1580    def _builder(args: t.List) -> F:
1581        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
1582        for arg in args[1:]:
1583            if not isinstance(arg, exp.Literal):
1584                # We use the fallback parser because we can't really transpile non-literals safely
1585                return expr_type.from_arg_list(args)
1586
1587            text = arg.name
1588            if is_int(text):
1589                index = int(text)
1590                segments.append(
1591                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
1592                )
1593            else:
1594                segments.append(exp.JSONPathKey(this=text))
1595
1596        # This is done to avoid failing in the expression validator due to the arg count
1597        del args[2:]
1598        return expr_type(
1599            this=seq_get(args, 0),
1600            expression=exp.JSONPath(expressions=segments),
1601            only_json_types=arrow_req_json_type,
1602        )
1603
1604    return _builder
1605
1606
1607def json_extract_segments(
1608    name: str, quoted_index: bool = True, op: t.Optional[str] = None
1609) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
1610    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1611        path = expression.expression
1612        if not isinstance(path, exp.JSONPath):
1613            return rename_func(name)(self, expression)
1614
1615        escape = path.args.get("escape")
1616
1617        segments = []
1618        for segment in path.expressions:
1619            path = self.sql(segment)
1620            if path:
1621                if isinstance(segment, exp.JSONPathPart) and (
1622                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
1623                ):
1624                    if escape:
1625                        path = self.escape_str(path)
1626
1627                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
1628
1629                segments.append(path)
1630
1631        if op:
1632            return f" {op} ".join([self.sql(expression.this), *segments])
1633        return self.func(name, expression.this, *segments)
1634
1635    return _json_extract_segments
1636
1637
1638def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
1639    if isinstance(expression.this, exp.JSONPathWildcard):
1640        self.unsupported("Unsupported wildcard in JSONPathKey expression")
1641
1642    return expression.name
1643
1644
1645def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str:
1646    cond = expression.expression
1647    if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1:
1648        alias = cond.expressions[0]
1649        cond = cond.this
1650    elif isinstance(cond, exp.Predicate):
1651        alias = "_u"
1652    else:
1653        self.unsupported("Unsupported filter condition")
1654        return ""
1655
1656    unnest = exp.Unnest(expressions=[expression.this])
1657    filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond)
1658    return self.sql(exp.Array(expressions=[filtered]))
1659
1660
1661def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
1662    return self.func(
1663        "TO_NUMBER",
1664        expression.this,
1665        expression.args.get("format"),
1666        expression.args.get("nlsparam"),
1667    )
1668
1669
1670def build_default_decimal_type(
1671    precision: t.Optional[int] = None, scale: t.Optional[int] = None
1672) -> t.Callable[[exp.DataType], exp.DataType]:
1673    def _builder(dtype: exp.DataType) -> exp.DataType:
1674        if dtype.expressions or precision is None:
1675            return dtype
1676
1677        params = f"{precision}{f', {scale}' if scale is not None else ''}"
1678        return exp.DataType.build(f"DECIMAL({params})")
1679
1680    return _builder
1681
1682
1683def build_timestamp_from_parts(args: t.List) -> exp.Func:
1684    if len(args) == 2:
1685        # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
1686        # so we parse this into Anonymous for now instead of introducing complexity
1687        return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
1688
1689    return exp.TimestampFromParts.from_arg_list(args)
1690
1691
1692def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
1693    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)
1694
1695
1696def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str:
1697    start = expression.args.get("start")
1698    end = expression.args.get("end")
1699    step = expression.args.get("step")
1700
1701    if isinstance(start, exp.Cast):
1702        target_type = start.to
1703    elif isinstance(end, exp.Cast):
1704        target_type = end.to
1705    else:
1706        target_type = None
1707
1708    if start and end and target_type and target_type.is_type("date", "timestamp"):
1709        if isinstance(start, exp.Cast) and target_type is start.to:
1710            end = exp.cast(end, target_type)
1711        else:
1712            start = exp.cast(start, target_type)
1713
1714    return self.func("SEQUENCE", start, end, step)
1715
1716
1717def build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]:
1718    def _builder(args: t.List, dialect: Dialect) -> E:
1719        return expr_type(
1720            this=seq_get(args, 0),
1721            expression=seq_get(args, 1),
1722            group=seq_get(args, 2) or exp.Literal.number(dialect.REGEXP_EXTRACT_DEFAULT_GROUP),
1723            parameters=seq_get(args, 3),
1724        )
1725
1726    return _builder
1727
1728
1729def explode_to_unnest_sql(self: Generator, expression: exp.Lateral) -> str:
1730    if isinstance(expression.this, exp.Explode):
1731        return self.sql(
1732            exp.Join(
1733                this=exp.Unnest(
1734                    expressions=[expression.this.this],
1735                    alias=expression.args.get("alias"),
1736                    offset=isinstance(expression.this, exp.Posexplode),
1737                ),
1738                kind="cross",
1739            )
1740        )
1741    return self.lateral_sql(expression)
1742
1743
1744def timestampdiff_sql(self: Generator, expression: exp.DatetimeDiff | exp.TimestampDiff) -> str:
1745    return self.func("TIMESTAMPDIFF", expression.unit, expression.expression, expression.this)
1746
1747
1748def no_make_interval_sql(self: Generator, expression: exp.MakeInterval, sep: str = ", ") -> str:
1749    args = []
1750    for unit, value in expression.args.items():
1751        if isinstance(value, exp.Kwarg):
1752            value = value.expression
1753
1754        args.append(f"{value} {unit}")
1755
1756    return f"INTERVAL '{self.format_args(*args, sep=sep)}'"
1757
1758
1759def length_or_char_length_sql(self: Generator, expression: exp.Length) -> str:
1760    length_func = "LENGTH" if expression.args.get("binary") else "CHAR_LENGTH"
1761    return self.func(length_func, expression.this)
logger = <Logger sqlglot (WARNING)>
UNESCAPED_SEQUENCES = {'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}
class Dialects(builtins.str, enum.Enum):
55class Dialects(str, Enum):
56    """Dialects supported by SQLGLot."""
57
58    DIALECT = ""
59
60    ATHENA = "athena"
61    BIGQUERY = "bigquery"
62    CLICKHOUSE = "clickhouse"
63    DATABRICKS = "databricks"
64    DORIS = "doris"
65    DRILL = "drill"
66    DUCKDB = "duckdb"
67    HIVE = "hive"
68    MATERIALIZE = "materialize"
69    MYSQL = "mysql"
70    ORACLE = "oracle"
71    POSTGRES = "postgres"
72    PRESTO = "presto"
73    PRQL = "prql"
74    REDSHIFT = "redshift"
75    RISINGWAVE = "risingwave"
76    SNOWFLAKE = "snowflake"
77    SPARK = "spark"
78    SPARK2 = "spark2"
79    SQLITE = "sqlite"
80    STARROCKS = "starrocks"
81    TABLEAU = "tableau"
82    TERADATA = "teradata"
83    TRINO = "trino"
84    TSQL = "tsql"

Dialects supported by SQLGLot.

DIALECT = <Dialects.DIALECT: ''>
ATHENA = <Dialects.ATHENA: 'athena'>
BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>
CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>
DATABRICKS = <Dialects.DATABRICKS: 'databricks'>
DORIS = <Dialects.DORIS: 'doris'>
DRILL = <Dialects.DRILL: 'drill'>
DUCKDB = <Dialects.DUCKDB: 'duckdb'>
HIVE = <Dialects.HIVE: 'hive'>
MATERIALIZE = <Dialects.MATERIALIZE: 'materialize'>
MYSQL = <Dialects.MYSQL: 'mysql'>
ORACLE = <Dialects.ORACLE: 'oracle'>
POSTGRES = <Dialects.POSTGRES: 'postgres'>
PRESTO = <Dialects.PRESTO: 'presto'>
PRQL = <Dialects.PRQL: 'prql'>
REDSHIFT = <Dialects.REDSHIFT: 'redshift'>
RISINGWAVE = <Dialects.RISINGWAVE: 'risingwave'>
SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>
SPARK = <Dialects.SPARK: 'spark'>
SPARK2 = <Dialects.SPARK2: 'spark2'>
SQLITE = <Dialects.SQLITE: 'sqlite'>
STARROCKS = <Dialects.STARROCKS: 'starrocks'>
TABLEAU = <Dialects.TABLEAU: 'tableau'>
TERADATA = <Dialects.TERADATA: 'teradata'>
TRINO = <Dialects.TRINO: 'trino'>
TSQL = <Dialects.TSQL: 'tsql'>
class NormalizationStrategy(builtins.str, sqlglot.helper.AutoName):
 87class NormalizationStrategy(str, AutoName):
 88    """Specifies the strategy according to which identifiers should be normalized."""
 89
 90    LOWERCASE = auto()
 91    """Unquoted identifiers are lowercased."""
 92
 93    UPPERCASE = auto()
 94    """Unquoted identifiers are uppercased."""
 95
 96    CASE_SENSITIVE = auto()
 97    """Always case-sensitive, regardless of quotes."""
 98
 99    CASE_INSENSITIVE = auto()
100    """Always case-insensitive, regardless of quotes."""

Specifies the strategy according to which identifiers should be normalized.

LOWERCASE = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Unquoted identifiers are lowercased.

UPPERCASE = <NormalizationStrategy.UPPERCASE: 'UPPERCASE'>

Unquoted identifiers are uppercased.

CASE_SENSITIVE = <NormalizationStrategy.CASE_SENSITIVE: 'CASE_SENSITIVE'>

Always case-sensitive, regardless of quotes.

CASE_INSENSITIVE = <NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>

Always case-insensitive, regardless of quotes.

class Dialect:
225class Dialect(metaclass=_Dialect):
226    INDEX_OFFSET = 0
227    """The base index offset for arrays."""
228
229    WEEK_OFFSET = 0
230    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
231
232    UNNEST_COLUMN_ONLY = False
233    """Whether `UNNEST` table aliases are treated as column aliases."""
234
235    ALIAS_POST_TABLESAMPLE = False
236    """Whether the table alias comes after tablesample."""
237
238    TABLESAMPLE_SIZE_IS_PERCENT = False
239    """Whether a size in the table sample clause represents percentage."""
240
241    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
242    """Specifies the strategy according to which identifiers should be normalized."""
243
244    IDENTIFIERS_CAN_START_WITH_DIGIT = False
245    """Whether an unquoted identifier can start with a digit."""
246
247    DPIPE_IS_STRING_CONCAT = True
248    """Whether the DPIPE token (`||`) is a string concatenation operator."""
249
250    STRICT_STRING_CONCAT = False
251    """Whether `CONCAT`'s arguments must be strings."""
252
253    SUPPORTS_USER_DEFINED_TYPES = True
254    """Whether user-defined data types are supported."""
255
256    SUPPORTS_SEMI_ANTI_JOIN = True
257    """Whether `SEMI` or `ANTI` joins are supported."""
258
259    SUPPORTS_COLUMN_JOIN_MARKS = False
260    """Whether the old-style outer join (+) syntax is supported."""
261
262    COPY_PARAMS_ARE_CSV = True
263    """Separator of COPY statement parameters."""
264
265    NORMALIZE_FUNCTIONS: bool | str = "upper"
266    """
267    Determines how function names are going to be normalized.
268    Possible values:
269        "upper" or True: Convert names to uppercase.
270        "lower": Convert names to lowercase.
271        False: Disables function name normalization.
272    """
273
274    PRESERVE_ORIGINAL_NAMES: bool = False
275    """
276    Whether the name of the function should be preserved inside the node's metadata,
277    can be useful for roundtripping deprecated vs new functions that share an AST node
278    e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
279    """
280
281    LOG_BASE_FIRST: t.Optional[bool] = True
282    """
283    Whether the base comes first in the `LOG` function.
284    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
285    """
286
287    NULL_ORDERING = "nulls_are_small"
288    """
289    Default `NULL` ordering method to use if not explicitly set.
290    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
291    """
292
293    TYPED_DIVISION = False
294    """
295    Whether the behavior of `a / b` depends on the types of `a` and `b`.
296    False means `a / b` is always float division.
297    True means `a / b` is integer division if both `a` and `b` are integers.
298    """
299
300    SAFE_DIVISION = False
301    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
302
303    CONCAT_COALESCE = False
304    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
305
306    HEX_LOWERCASE = False
307    """Whether the `HEX` function returns a lowercase hexadecimal string."""
308
309    DATE_FORMAT = "'%Y-%m-%d'"
310    DATEINT_FORMAT = "'%Y%m%d'"
311    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
312
313    TIME_MAPPING: t.Dict[str, str] = {}
314    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
315
316    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
317    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
318    FORMAT_MAPPING: t.Dict[str, str] = {}
319    """
320    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
321    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
322    """
323
324    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
325    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
326
327    PSEUDOCOLUMNS: t.Set[str] = set()
328    """
329    Columns that are auto-generated by the engine corresponding to this dialect.
330    For example, such columns may be excluded from `SELECT *` queries.
331    """
332
333    PREFER_CTE_ALIAS_COLUMN = False
334    """
335    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
336    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
337    any projection aliases in the subquery.
338
339    For example,
340        WITH y(c) AS (
341            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
342        ) SELECT c FROM y;
343
344        will be rewritten as
345
346        WITH y(c) AS (
347            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
348        ) SELECT c FROM y;
349    """
350
351    COPY_PARAMS_ARE_CSV = True
352    """
353    Whether COPY statement parameters are separated by comma or whitespace
354    """
355
356    FORCE_EARLY_ALIAS_REF_EXPANSION = False
357    """
358    Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
359
360    For example:
361        WITH data AS (
362        SELECT
363            1 AS id,
364            2 AS my_id
365        )
366        SELECT
367            id AS my_id
368        FROM
369            data
370        WHERE
371            my_id = 1
372        GROUP BY
373            my_id,
374        HAVING
375            my_id = 1
376
377    In most dialects, "my_id" would refer to "data.my_id" across the query, except:
378        - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e
379          it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1"
380        - Clickhouse, which will forward the alias across the query i.e it resolves
381        to "WHERE id = 1 GROUP BY id HAVING id = 1"
382    """
383
384    EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False
385    """Whether alias reference expansion before qualification should only happen for the GROUP BY clause."""
386
387    SUPPORTS_ORDER_BY_ALL = False
388    """
389    Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
390    """
391
392    HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
393    """
394    Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)
395    as the former is of type INT[] vs the latter which is SUPER
396    """
397
398    SUPPORTS_FIXED_SIZE_ARRAYS = False
399    """
400    Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g.
401    in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should
402    be interpreted as a subscript/index operator.
403    """
404
405    STRICT_JSON_PATH_SYNTAX = True
406    """Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning."""
407
408    ON_CONDITION_EMPTY_BEFORE_ERROR = True
409    """Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle)."""
410
411    ARRAY_AGG_INCLUDES_NULLS: t.Optional[bool] = True
412    """Whether ArrayAgg needs to filter NULL values."""
413
414    PROMOTE_TO_INFERRED_DATETIME_TYPE = False
415    """
416    This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted
417    to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal
418    is cast to x's type to match it instead.
419    """
420
421    SUPPORTS_VALUES_DEFAULT = True
422    """Whether the DEFAULT keyword is supported in the VALUES clause."""
423
424    NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = False
425    """Whether number literals can include underscores for better readability"""
426
427    REGEXP_EXTRACT_DEFAULT_GROUP = 0
428    """The default value for the capturing group."""
429
430    SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = {
431        exp.Except: True,
432        exp.Intersect: True,
433        exp.Union: True,
434    }
435    """
436    Whether a set operation uses DISTINCT by default. This is `None` when either `DISTINCT` or `ALL`
437    must be explicitly specified.
438    """
439
440    CREATABLE_KIND_MAPPING: dict[str, str] = {}
441    """
442    Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse
443    equivalent of CREATE SCHEMA is CREATE DATABASE.
444    """
445
446    # --- Autofilled ---
447
448    tokenizer_class = Tokenizer
449    jsonpath_tokenizer_class = JSONPathTokenizer
450    parser_class = Parser
451    generator_class = Generator
452
453    # A trie of the time_mapping keys
454    TIME_TRIE: t.Dict = {}
455    FORMAT_TRIE: t.Dict = {}
456
457    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
458    INVERSE_TIME_TRIE: t.Dict = {}
459    INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {}
460    INVERSE_FORMAT_TRIE: t.Dict = {}
461
462    INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
463
464    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
465
466    # Delimiters for string literals and identifiers
467    QUOTE_START = "'"
468    QUOTE_END = "'"
469    IDENTIFIER_START = '"'
470    IDENTIFIER_END = '"'
471
472    # Delimiters for bit, hex, byte and unicode literals
473    BIT_START: t.Optional[str] = None
474    BIT_END: t.Optional[str] = None
475    HEX_START: t.Optional[str] = None
476    HEX_END: t.Optional[str] = None
477    BYTE_START: t.Optional[str] = None
478    BYTE_END: t.Optional[str] = None
479    UNICODE_START: t.Optional[str] = None
480    UNICODE_END: t.Optional[str] = None
481
482    DATE_PART_MAPPING = {
483        "Y": "YEAR",
484        "YY": "YEAR",
485        "YYY": "YEAR",
486        "YYYY": "YEAR",
487        "YR": "YEAR",
488        "YEARS": "YEAR",
489        "YRS": "YEAR",
490        "MM": "MONTH",
491        "MON": "MONTH",
492        "MONS": "MONTH",
493        "MONTHS": "MONTH",
494        "D": "DAY",
495        "DD": "DAY",
496        "DAYS": "DAY",
497        "DAYOFMONTH": "DAY",
498        "DAY OF WEEK": "DAYOFWEEK",
499        "WEEKDAY": "DAYOFWEEK",
500        "DOW": "DAYOFWEEK",
501        "DW": "DAYOFWEEK",
502        "WEEKDAY_ISO": "DAYOFWEEKISO",
503        "DOW_ISO": "DAYOFWEEKISO",
504        "DW_ISO": "DAYOFWEEKISO",
505        "DAY OF YEAR": "DAYOFYEAR",
506        "DOY": "DAYOFYEAR",
507        "DY": "DAYOFYEAR",
508        "W": "WEEK",
509        "WK": "WEEK",
510        "WEEKOFYEAR": "WEEK",
511        "WOY": "WEEK",
512        "WY": "WEEK",
513        "WEEK_ISO": "WEEKISO",
514        "WEEKOFYEARISO": "WEEKISO",
515        "WEEKOFYEAR_ISO": "WEEKISO",
516        "Q": "QUARTER",
517        "QTR": "QUARTER",
518        "QTRS": "QUARTER",
519        "QUARTERS": "QUARTER",
520        "H": "HOUR",
521        "HH": "HOUR",
522        "HR": "HOUR",
523        "HOURS": "HOUR",
524        "HRS": "HOUR",
525        "M": "MINUTE",
526        "MI": "MINUTE",
527        "MIN": "MINUTE",
528        "MINUTES": "MINUTE",
529        "MINS": "MINUTE",
530        "S": "SECOND",
531        "SEC": "SECOND",
532        "SECONDS": "SECOND",
533        "SECS": "SECOND",
534        "MS": "MILLISECOND",
535        "MSEC": "MILLISECOND",
536        "MSECS": "MILLISECOND",
537        "MSECOND": "MILLISECOND",
538        "MSECONDS": "MILLISECOND",
539        "MILLISEC": "MILLISECOND",
540        "MILLISECS": "MILLISECOND",
541        "MILLISECON": "MILLISECOND",
542        "MILLISECONDS": "MILLISECOND",
543        "US": "MICROSECOND",
544        "USEC": "MICROSECOND",
545        "USECS": "MICROSECOND",
546        "MICROSEC": "MICROSECOND",
547        "MICROSECS": "MICROSECOND",
548        "USECOND": "MICROSECOND",
549        "USECONDS": "MICROSECOND",
550        "MICROSECONDS": "MICROSECOND",
551        "NS": "NANOSECOND",
552        "NSEC": "NANOSECOND",
553        "NANOSEC": "NANOSECOND",
554        "NSECOND": "NANOSECOND",
555        "NSECONDS": "NANOSECOND",
556        "NANOSECS": "NANOSECOND",
557        "EPOCH_SECOND": "EPOCH",
558        "EPOCH_SECONDS": "EPOCH",
559        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
560        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
561        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
562        "TZH": "TIMEZONE_HOUR",
563        "TZM": "TIMEZONE_MINUTE",
564        "DEC": "DECADE",
565        "DECS": "DECADE",
566        "DECADES": "DECADE",
567        "MIL": "MILLENIUM",
568        "MILS": "MILLENIUM",
569        "MILLENIA": "MILLENIUM",
570        "C": "CENTURY",
571        "CENT": "CENTURY",
572        "CENTS": "CENTURY",
573        "CENTURIES": "CENTURY",
574    }
575
576    TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = {
577        exp.DataType.Type.BIGINT: {
578            exp.ApproxDistinct,
579            exp.ArraySize,
580            exp.Length,
581        },
582        exp.DataType.Type.BOOLEAN: {
583            exp.Between,
584            exp.Boolean,
585            exp.In,
586            exp.RegexpLike,
587        },
588        exp.DataType.Type.DATE: {
589            exp.CurrentDate,
590            exp.Date,
591            exp.DateFromParts,
592            exp.DateStrToDate,
593            exp.DiToDate,
594            exp.StrToDate,
595            exp.TimeStrToDate,
596            exp.TsOrDsToDate,
597        },
598        exp.DataType.Type.DATETIME: {
599            exp.CurrentDatetime,
600            exp.Datetime,
601            exp.DatetimeAdd,
602            exp.DatetimeSub,
603        },
604        exp.DataType.Type.DOUBLE: {
605            exp.ApproxQuantile,
606            exp.Avg,
607            exp.Exp,
608            exp.Ln,
609            exp.Log,
610            exp.Pow,
611            exp.Quantile,
612            exp.Round,
613            exp.SafeDivide,
614            exp.Sqrt,
615            exp.Stddev,
616            exp.StddevPop,
617            exp.StddevSamp,
618            exp.ToDouble,
619            exp.Variance,
620            exp.VariancePop,
621        },
622        exp.DataType.Type.INT: {
623            exp.Ceil,
624            exp.DatetimeDiff,
625            exp.DateDiff,
626            exp.TimestampDiff,
627            exp.TimeDiff,
628            exp.DateToDi,
629            exp.Levenshtein,
630            exp.Sign,
631            exp.StrPosition,
632            exp.TsOrDiToDi,
633        },
634        exp.DataType.Type.JSON: {
635            exp.ParseJSON,
636        },
637        exp.DataType.Type.TIME: {
638            exp.Time,
639        },
640        exp.DataType.Type.TIMESTAMP: {
641            exp.CurrentTime,
642            exp.CurrentTimestamp,
643            exp.StrToTime,
644            exp.TimeAdd,
645            exp.TimeStrToTime,
646            exp.TimeSub,
647            exp.TimestampAdd,
648            exp.TimestampSub,
649            exp.UnixToTime,
650        },
651        exp.DataType.Type.TINYINT: {
652            exp.Day,
653            exp.Month,
654            exp.Week,
655            exp.Year,
656            exp.Quarter,
657        },
658        exp.DataType.Type.VARCHAR: {
659            exp.ArrayConcat,
660            exp.Concat,
661            exp.ConcatWs,
662            exp.DateToDateStr,
663            exp.GroupConcat,
664            exp.Initcap,
665            exp.Lower,
666            exp.Substring,
667            exp.String,
668            exp.TimeToStr,
669            exp.TimeToTimeStr,
670            exp.Trim,
671            exp.TsOrDsToDateStr,
672            exp.UnixToStr,
673            exp.UnixToTimeStr,
674            exp.Upper,
675        },
676    }
677
678    ANNOTATORS: AnnotatorsType = {
679        **{
680            expr_type: lambda self, e: self._annotate_unary(e)
681            for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
682        },
683        **{
684            expr_type: lambda self, e: self._annotate_binary(e)
685            for expr_type in subclasses(exp.__name__, exp.Binary)
686        },
687        **{
688            expr_type: _annotate_with_type_lambda(data_type)
689            for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
690            for expr_type in expressions
691        },
692        exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
693        exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
694        exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
695        exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
696        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
697        exp.Bracket: lambda self, e: self._annotate_bracket(e),
698        exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
699        exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
700        exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
701        exp.Count: lambda self, e: self._annotate_with_type(
702            e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT
703        ),
704        exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()),
705        exp.DateAdd: lambda self, e: self._annotate_timeunit(e),
706        exp.DateSub: lambda self, e: self._annotate_timeunit(e),
707        exp.DateTrunc: lambda self, e: self._annotate_timeunit(e),
708        exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"),
709        exp.Div: lambda self, e: self._annotate_div(e),
710        exp.Dot: lambda self, e: self._annotate_dot(e),
711        exp.Explode: lambda self, e: self._annotate_explode(e),
712        exp.Extract: lambda self, e: self._annotate_extract(e),
713        exp.Filter: lambda self, e: self._annotate_by_args(e, "this"),
714        exp.GenerateDateArray: lambda self, e: self._annotate_with_type(
715            e, exp.DataType.build("ARRAY<DATE>")
716        ),
717        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
718            e, exp.DataType.build("ARRAY<TIMESTAMP>")
719        ),
720        exp.Greatest: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
721        exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"),
722        exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL),
723        exp.Least: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
724        exp.Literal: lambda self, e: self._annotate_literal(e),
725        exp.Map: lambda self, e: self._annotate_map(e),
726        exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
727        exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
728        exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL),
729        exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"),
730        exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"),
731        exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
732        exp.Struct: lambda self, e: self._annotate_struct(e),
733        exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True),
734        exp.Timestamp: lambda self, e: self._annotate_with_type(
735            e,
736            exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
737        ),
738        exp.ToMap: lambda self, e: self._annotate_to_map(e),
739        exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
740        exp.Unnest: lambda self, e: self._annotate_unnest(e),
741        exp.VarMap: lambda self, e: self._annotate_map(e),
742    }
743
744    @classmethod
745    def get_or_raise(cls, dialect: DialectType) -> Dialect:
746        """
747        Look up a dialect in the global dialect registry and return it if it exists.
748
749        Args:
750            dialect: The target dialect. If this is a string, it can be optionally followed by
751                additional key-value pairs that are separated by commas and are used to specify
752                dialect settings, such as whether the dialect's identifiers are case-sensitive.
753
754        Example:
755            >>> dialect = dialect_class = get_or_raise("duckdb")
756            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
757
758        Returns:
759            The corresponding Dialect instance.
760        """
761
762        if not dialect:
763            return cls()
764        if isinstance(dialect, _Dialect):
765            return dialect()
766        if isinstance(dialect, Dialect):
767            return dialect
768        if isinstance(dialect, str):
769            try:
770                dialect_name, *kv_strings = dialect.split(",")
771                kv_pairs = (kv.split("=") for kv in kv_strings)
772                kwargs = {}
773                for pair in kv_pairs:
774                    key = pair[0].strip()
775                    value: t.Union[bool | str | None] = None
776
777                    if len(pair) == 1:
778                        # Default initialize standalone settings to True
779                        value = True
780                    elif len(pair) == 2:
781                        value = pair[1].strip()
782
783                    kwargs[key] = to_bool(value)
784
785            except ValueError:
786                raise ValueError(
787                    f"Invalid dialect format: '{dialect}'. "
788                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
789                )
790
791            result = cls.get(dialect_name.strip())
792            if not result:
793                from difflib import get_close_matches
794
795                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
796                if similar:
797                    similar = f" Did you mean {similar}?"
798
799                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
800
801            return result(**kwargs)
802
803        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
804
805    @classmethod
806    def format_time(
807        cls, expression: t.Optional[str | exp.Expression]
808    ) -> t.Optional[exp.Expression]:
809        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
810        if isinstance(expression, str):
811            return exp.Literal.string(
812                # the time formats are quoted
813                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
814            )
815
816        if expression and expression.is_string:
817            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
818
819        return expression
820
821    def __init__(self, **kwargs) -> None:
822        normalization_strategy = kwargs.pop("normalization_strategy", None)
823
824        if normalization_strategy is None:
825            self.normalization_strategy = self.NORMALIZATION_STRATEGY
826        else:
827            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
828
829        self.settings = kwargs
830
831    def __eq__(self, other: t.Any) -> bool:
832        # Does not currently take dialect state into account
833        return type(self) == other
834
835    def __hash__(self) -> int:
836        # Does not currently take dialect state into account
837        return hash(type(self))
838
839    def normalize_identifier(self, expression: E) -> E:
840        """
841        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
842
843        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
844        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
845        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
846        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
847
848        There are also dialects like Spark, which are case-insensitive even when quotes are
849        present, and dialects like MySQL, whose resolution rules match those employed by the
850        underlying operating system, for example they may always be case-sensitive in Linux.
851
852        Finally, the normalization behavior of some engines can even be controlled through flags,
853        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
854
855        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
856        that it can analyze queries in the optimizer and successfully capture their semantics.
857        """
858        if (
859            isinstance(expression, exp.Identifier)
860            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
861            and (
862                not expression.quoted
863                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
864            )
865        ):
866            expression.set(
867                "this",
868                (
869                    expression.this.upper()
870                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
871                    else expression.this.lower()
872                ),
873            )
874
875        return expression
876
877    def case_sensitive(self, text: str) -> bool:
878        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
879        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
880            return False
881
882        unsafe = (
883            str.islower
884            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
885            else str.isupper
886        )
887        return any(unsafe(char) for char in text)
888
889    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
890        """Checks if text can be identified given an identify option.
891
892        Args:
893            text: The text to check.
894            identify:
895                `"always"` or `True`: Always returns `True`.
896                `"safe"`: Only returns `True` if the identifier is case-insensitive.
897
898        Returns:
899            Whether the given text can be identified.
900        """
901        if identify is True or identify == "always":
902            return True
903
904        if identify == "safe":
905            return not self.case_sensitive(text)
906
907        return False
908
909    def quote_identifier(self, expression: E, identify: bool = True) -> E:
910        """
911        Adds quotes to a given identifier.
912
913        Args:
914            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
915            identify: If set to `False`, the quotes will only be added if the identifier is deemed
916                "unsafe", with respect to its characters and this dialect's normalization strategy.
917        """
918        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
919            name = expression.this
920            expression.set(
921                "quoted",
922                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
923            )
924
925        return expression
926
927    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
928        if isinstance(path, exp.Literal):
929            path_text = path.name
930            if path.is_number:
931                path_text = f"[{path_text}]"
932            try:
933                return parse_json_path(path_text, self)
934            except ParseError as e:
935                if self.STRICT_JSON_PATH_SYNTAX:
936                    logger.warning(f"Invalid JSON path syntax. {str(e)}")
937
938        return path
939
940    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
941        return self.parser(**opts).parse(self.tokenize(sql), sql)
942
943    def parse_into(
944        self, expression_type: exp.IntoType, sql: str, **opts
945    ) -> t.List[t.Optional[exp.Expression]]:
946        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
947
948    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
949        return self.generator(**opts).generate(expression, copy=copy)
950
951    def transpile(self, sql: str, **opts) -> t.List[str]:
952        return [
953            self.generate(expression, copy=False, **opts) if expression else ""
954            for expression in self.parse(sql)
955        ]
956
957    def tokenize(self, sql: str) -> t.List[Token]:
958        return self.tokenizer.tokenize(sql)
959
960    @property
961    def tokenizer(self) -> Tokenizer:
962        return self.tokenizer_class(dialect=self)
963
964    @property
965    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
966        return self.jsonpath_tokenizer_class(dialect=self)
967
968    def parser(self, **opts) -> Parser:
969        return self.parser_class(dialect=self, **opts)
970
971    def generator(self, **opts) -> Generator:
972        return self.generator_class(dialect=self, **opts)
Dialect(**kwargs)
821    def __init__(self, **kwargs) -> None:
822        normalization_strategy = kwargs.pop("normalization_strategy", None)
823
824        if normalization_strategy is None:
825            self.normalization_strategy = self.NORMALIZATION_STRATEGY
826        else:
827            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
828
829        self.settings = kwargs
INDEX_OFFSET = 0

The base index offset for arrays.

WEEK_OFFSET = 0

First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.

UNNEST_COLUMN_ONLY = False

Whether UNNEST table aliases are treated as column aliases.

ALIAS_POST_TABLESAMPLE = False

Whether the table alias comes after tablesample.

TABLESAMPLE_SIZE_IS_PERCENT = False

Whether a size in the table sample clause represents percentage.

NORMALIZATION_STRATEGY = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Specifies the strategy according to which identifiers should be normalized.

IDENTIFIERS_CAN_START_WITH_DIGIT = False

Whether an unquoted identifier can start with a digit.

DPIPE_IS_STRING_CONCAT = True

Whether the DPIPE token (||) is a string concatenation operator.

STRICT_STRING_CONCAT = False

Whether CONCAT's arguments must be strings.

SUPPORTS_USER_DEFINED_TYPES = True

Whether user-defined data types are supported.

SUPPORTS_SEMI_ANTI_JOIN = True

Whether SEMI or ANTI joins are supported.

SUPPORTS_COLUMN_JOIN_MARKS = False

Whether the old-style outer join (+) syntax is supported.

COPY_PARAMS_ARE_CSV = True

Whether COPY statement parameters are separated by comma or whitespace

NORMALIZE_FUNCTIONS: bool | str = 'upper'

Determines how function names are going to be normalized.

Possible values:

"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.

PRESERVE_ORIGINAL_NAMES: bool = False

Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery

LOG_BASE_FIRST: Optional[bool] = True

Whether the base comes first in the LOG function. Possible values: True, False, None (two arguments are not supported by LOG)

NULL_ORDERING = 'nulls_are_small'

Default NULL ordering method to use if not explicitly set. Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"

TYPED_DIVISION = False

Whether the behavior of a / b depends on the types of a and b. False means a / b is always float division. True means a / b is integer division if both a and b are integers.

SAFE_DIVISION = False

Whether division by zero throws an error (False) or returns NULL (True).

CONCAT_COALESCE = False

A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.

HEX_LOWERCASE = False

Whether the HEX function returns a lowercase hexadecimal string.

DATE_FORMAT = "'%Y-%m-%d'"
DATEINT_FORMAT = "'%Y%m%d'"
TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
TIME_MAPPING: Dict[str, str] = {}

Associates this dialect's time formats with their equivalent Python strftime formats.

FORMAT_MAPPING: Dict[str, str] = {}

Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy'). If empty, the corresponding trie will be constructed off of TIME_MAPPING.

UNESCAPED_SEQUENCES: Dict[str, str] = {}

Mapping of an escaped sequence (\n) to its unescaped version ( ).

PSEUDOCOLUMNS: Set[str] = set()

Columns that are auto-generated by the engine corresponding to this dialect. For example, such columns may be excluded from SELECT * queries.

PREFER_CTE_ALIAS_COLUMN = False

Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.

For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;

will be rewritten as

WITH y(c) AS (
    SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
FORCE_EARLY_ALIAS_REF_EXPANSION = False

Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).

For example:

WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1

In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"

EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False

Whether alias reference expansion before qualification should only happen for the GROUP BY clause.

SUPPORTS_ORDER_BY_ALL = False

Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks

HAS_DISTINCT_ARRAY_CONSTRUCTORS = False

Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) as the former is of type INT[] vs the latter which is SUPER

SUPPORTS_FIXED_SIZE_ARRAYS = False

Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.

STRICT_JSON_PATH_SYNTAX = True

Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.

ON_CONDITION_EMPTY_BEFORE_ERROR = True

Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle).

ARRAY_AGG_INCLUDES_NULLS: Optional[bool] = True

Whether ArrayAgg needs to filter NULL values.

PROMOTE_TO_INFERRED_DATETIME_TYPE = False

This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal is cast to x's type to match it instead.

SUPPORTS_VALUES_DEFAULT = True

Whether the DEFAULT keyword is supported in the VALUES clause.

NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = False

Whether number literals can include underscores for better readability

REGEXP_EXTRACT_DEFAULT_GROUP = 0

The default value for the capturing group.

SET_OP_DISTINCT_BY_DEFAULT: Dict[Type[sqlglot.expressions.Expression], Optional[bool]] = {<class 'sqlglot.expressions.Except'>: True, <class 'sqlglot.expressions.Intersect'>: True, <class 'sqlglot.expressions.Union'>: True}

Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL must be explicitly specified.

CREATABLE_KIND_MAPPING: dict[str, str] = {}

Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.

tokenizer_class = <class 'sqlglot.tokens.Tokenizer'>
jsonpath_tokenizer_class = <class 'sqlglot.tokens.JSONPathTokenizer'>
parser_class = <class 'sqlglot.parser.Parser'>
generator_class = <class 'sqlglot.generator.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
INVERSE_FORMAT_MAPPING: Dict[str, str] = {}
INVERSE_FORMAT_TRIE: Dict = {}
INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
ESCAPED_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '"'
IDENTIFIER_END = '"'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
DATE_PART_MAPPING = {'Y': 'YEAR', 'YY': 'YEAR', 'YYY': 'YEAR', 'YYYY': 'YEAR', 'YR': 'YEAR', 'YEARS': 'YEAR', 'YRS': 'YEAR', 'MM': 'MONTH', 'MON': 'MONTH', 'MONS': 'MONTH', 'MONTHS': 'MONTH', 'D': 'DAY', 'DD': 'DAY', 'DAYS': 'DAY', 'DAYOFMONTH': 'DAY', 'DAY OF WEEK': 'DAYOFWEEK', 'WEEKDAY': 'DAYOFWEEK', 'DOW': 'DAYOFWEEK', 'DW': 'DAYOFWEEK', 'WEEKDAY_ISO': 'DAYOFWEEKISO', 'DOW_ISO': 'DAYOFWEEKISO', 'DW_ISO': 'DAYOFWEEKISO', 'DAY OF YEAR': 'DAYOFYEAR', 'DOY': 'DAYOFYEAR', 'DY': 'DAYOFYEAR', 'W': 'WEEK', 'WK': 'WEEK', 'WEEKOFYEAR': 'WEEK', 'WOY': 'WEEK', 'WY': 'WEEK', 'WEEK_ISO': 'WEEKISO', 'WEEKOFYEARISO': 'WEEKISO', 'WEEKOFYEAR_ISO': 'WEEKISO', 'Q': 'QUARTER', 'QTR': 'QUARTER', 'QTRS': 'QUARTER', 'QUARTERS': 'QUARTER', 'H': 'HOUR', 'HH': 'HOUR', 'HR': 'HOUR', 'HOURS': 'HOUR', 'HRS': 'HOUR', 'M': 'MINUTE', 'MI': 'MINUTE', 'MIN': 'MINUTE', 'MINUTES': 'MINUTE', 'MINS': 'MINUTE', 'S': 'SECOND', 'SEC': 'SECOND', 'SECONDS': 'SECOND', 'SECS': 'SECOND', 'MS': 'MILLISECOND', 'MSEC': 'MILLISECOND', 'MSECS': 'MILLISECOND', 'MSECOND': 'MILLISECOND', 'MSECONDS': 'MILLISECOND', 'MILLISEC': 'MILLISECOND', 'MILLISECS': 'MILLISECOND', 'MILLISECON': 'MILLISECOND', 'MILLISECONDS': 'MILLISECOND', 'US': 'MICROSECOND', 'USEC': 'MICROSECOND', 'USECS': 'MICROSECOND', 'MICROSEC': 'MICROSECOND', 'MICROSECS': 'MICROSECOND', 'USECOND': 'MICROSECOND', 'USECONDS': 'MICROSECOND', 'MICROSECONDS': 'MICROSECOND', 'NS': 'NANOSECOND', 'NSEC': 'NANOSECOND', 'NANOSEC': 'NANOSECOND', 'NSECOND': 'NANOSECOND', 'NSECONDS': 'NANOSECOND', 'NANOSECS': 'NANOSECOND', 'EPOCH_SECOND': 'EPOCH', 'EPOCH_SECONDS': 'EPOCH', 'EPOCH_MILLISECONDS': 'EPOCH_MILLISECOND', 'EPOCH_MICROSECONDS': 'EPOCH_MICROSECOND', 'EPOCH_NANOSECONDS': 'EPOCH_NANOSECOND', 'TZH': 'TIMEZONE_HOUR', 'TZM': 'TIMEZONE_MINUTE', 'DEC': 'DECADE', 'DECS': 'DECADE', 'DECADES': 'DECADE', 'MIL': 'MILLENIUM', 'MILS': 'MILLENIUM', 'MILLENIA': 'MILLENIUM', 'C': 'CENTURY', 'CENT': 'CENTURY', 'CENTS': 'CENTURY', 'CENTURIES': 'CENTURY'}
TYPE_TO_EXPRESSIONS: Dict[sqlglot.expressions.DataType.Type, Set[Type[sqlglot.expressions.Expression]]] = {<Type.BIGINT: 'BIGINT'>: {<class 'sqlglot.expressions.ApproxDistinct'>, <class 'sqlglot.expressions.ArraySize'>, <class 'sqlglot.expressions.Length'>}, <Type.BOOLEAN: 'BOOLEAN'>: {<class 'sqlglot.expressions.RegexpLike'>, <class 'sqlglot.expressions.In'>, <class 'sqlglot.expressions.Boolean'>, <class 'sqlglot.expressions.Between'>}, <Type.DATE: 'DATE'>: {<class 'sqlglot.expressions.DateStrToDate'>, <class 'sqlglot.expressions.CurrentDate'>, <class 'sqlglot.expressions.TimeStrToDate'>, <class 'sqlglot.expressions.DiToDate'>, <class 'sqlglot.expressions.DateFromParts'>, <class 'sqlglot.expressions.TsOrDsToDate'>, <class 'sqlglot.expressions.StrToDate'>, <class 'sqlglot.expressions.Date'>}, <Type.DATETIME: 'DATETIME'>: {<class 'sqlglot.expressions.DatetimeAdd'>, <class 'sqlglot.expressions.Datetime'>, <class 'sqlglot.expressions.CurrentDatetime'>, <class 'sqlglot.expressions.DatetimeSub'>}, <Type.DOUBLE: 'DOUBLE'>: {<class 'sqlglot.expressions.StddevSamp'>, <class 'sqlglot.expressions.Avg'>, <class 'sqlglot.expressions.StddevPop'>, <class 'sqlglot.expressions.Stddev'>, <class 'sqlglot.expressions.Pow'>, <class 'sqlglot.expressions.Sqrt'>, <class 'sqlglot.expressions.ApproxQuantile'>, <class 'sqlglot.expressions.SafeDivide'>, <class 'sqlglot.expressions.Quantile'>, <class 'sqlglot.expressions.VariancePop'>, <class 'sqlglot.expressions.Round'>, <class 'sqlglot.expressions.Variance'>, <class 'sqlglot.expressions.Log'>, <class 'sqlglot.expressions.ToDouble'>, <class 'sqlglot.expressions.Ln'>, <class 'sqlglot.expressions.Exp'>}, <Type.INT: 'INT'>: {<class 'sqlglot.expressions.DatetimeDiff'>, <class 'sqlglot.expressions.TimestampDiff'>, <class 'sqlglot.expressions.Levenshtein'>, <class 'sqlglot.expressions.Sign'>, <class 'sqlglot.expressions.TsOrDiToDi'>, <class 'sqlglot.expressions.Ceil'>, <class 'sqlglot.expressions.DateDiff'>, <class 'sqlglot.expressions.TimeDiff'>, <class 'sqlglot.expressions.StrPosition'>, <class 'sqlglot.expressions.DateToDi'>}, <Type.JSON: 'JSON'>: {<class 'sqlglot.expressions.ParseJSON'>}, <Type.TIME: 'TIME'>: {<class 'sqlglot.expressions.Time'>}, <Type.TIMESTAMP: 'TIMESTAMP'>: {<class 'sqlglot.expressions.CurrentTimestamp'>, <class 'sqlglot.expressions.TimestampSub'>, <class 'sqlglot.expressions.TimeStrToTime'>, <class 'sqlglot.expressions.CurrentTime'>, <class 'sqlglot.expressions.TimestampAdd'>, <class 'sqlglot.expressions.UnixToTime'>, <class 'sqlglot.expressions.StrToTime'>, <class 'sqlglot.expressions.TimeAdd'>, <class 'sqlglot.expressions.TimeSub'>}, <Type.TINYINT: 'TINYINT'>: {<class 'sqlglot.expressions.Year'>, <class 'sqlglot.expressions.Quarter'>, <class 'sqlglot.expressions.Week'>, <class 'sqlglot.expressions.Month'>, <class 'sqlglot.expressions.Day'>}, <Type.VARCHAR: 'VARCHAR'>: {<class 'sqlglot.expressions.String'>, <class 'sqlglot.expressions.Trim'>, <class 'sqlglot.expressions.DateToDateStr'>, <class 'sqlglot.expressions.Initcap'>, <class 'sqlglot.expressions.Upper'>, <class 'sqlglot.expressions.Substring'>, <class 'sqlglot.expressions.GroupConcat'>, <class 'sqlglot.expressions.Lower'>, <class 'sqlglot.expressions.UnixToTimeStr'>, <class 'sqlglot.expressions.ArrayConcat'>, <class 'sqlglot.expressions.UnixToStr'>, <class 'sqlglot.expressions.TimeToTimeStr'>, <class 'sqlglot.expressions.TsOrDsToDateStr'>, <class 'sqlglot.expressions.TimeToStr'>, <class 'sqlglot.expressions.ConcatWs'>, <class 'sqlglot.expressions.Concat'>}}
ANNOTATORS: Dict[Type[~E], Callable[[sqlglot.optimizer.annotate_types.TypeAnnotator, ~E], ~E]] = {<class 'sqlglot.expressions.Alias'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseNot'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Neg'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Not'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Paren'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.PivotAlias'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Unary'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Add'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.And'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayContainsAll'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayOverlaps'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Binary'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseAnd'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseLeftShift'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseOr'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseRightShift'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseXor'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Collate'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Connector'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Corr'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.CovarPop'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.CovarSamp'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.DPipe'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Distance'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Div'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Dot'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.EQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Escape'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.GT'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.GTE'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Glob'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ILike'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ILikeAny'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.IntDiv'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Is'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONArrayContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBExtract'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBExtractScalar'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Kwarg'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LT'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LTE'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Like'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LikeAny'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Mod'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Mul'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NullSafeEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NullSafeNEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Operator'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Or'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Overlaps'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Pow'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.PropertyEQ'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.RegexpILike'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.RegexpLike'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.SimilarTo'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Slice'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Sub'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Xor'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Length'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.In'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Boolean'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Between'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateStrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateFromParts'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Date'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Datetime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentDatetime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StddevSamp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Avg'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StddevPop'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Stddev'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Sqrt'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.SafeDivide'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Quantile'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.VariancePop'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Round'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Variance'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Log'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ToDouble'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Ln'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Exp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Levenshtein'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Sign'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Ceil'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateToDi'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ParseJSON'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Time'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentTimestamp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Year'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Quarter'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Week'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Month'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Day'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.String'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Trim'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateToDateStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Initcap'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Upper'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Substring'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.GroupConcat'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Lower'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayConcat'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.UnixToStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToTimeStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDateStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ConcatWs'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Concat'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Abs'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Anonymous'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Array'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.ArrayAgg'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Bracket'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Cast'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Case'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Coalesce'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Count'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DataType'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateTrunc'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Distinct'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Explode'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Extract'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Filter'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.GenerateDateArray'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.GenerateTimestampArray'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Greatest'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.If'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Interval'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Least'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Literal'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Map'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Max'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Min'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Null'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Nullif'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Struct'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Sum'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.ToMap'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.TryCast'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Unnest'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function Dialect.<lambda>>}
@classmethod
def get_or_raise( cls, dialect: Union[str, Dialect, Type[Dialect], NoneType]) -> Dialect:
744    @classmethod
745    def get_or_raise(cls, dialect: DialectType) -> Dialect:
746        """
747        Look up a dialect in the global dialect registry and return it if it exists.
748
749        Args:
750            dialect: The target dialect. If this is a string, it can be optionally followed by
751                additional key-value pairs that are separated by commas and are used to specify
752                dialect settings, such as whether the dialect's identifiers are case-sensitive.
753
754        Example:
755            >>> dialect = dialect_class = get_or_raise("duckdb")
756            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
757
758        Returns:
759            The corresponding Dialect instance.
760        """
761
762        if not dialect:
763            return cls()
764        if isinstance(dialect, _Dialect):
765            return dialect()
766        if isinstance(dialect, Dialect):
767            return dialect
768        if isinstance(dialect, str):
769            try:
770                dialect_name, *kv_strings = dialect.split(",")
771                kv_pairs = (kv.split("=") for kv in kv_strings)
772                kwargs = {}
773                for pair in kv_pairs:
774                    key = pair[0].strip()
775                    value: t.Union[bool | str | None] = None
776
777                    if len(pair) == 1:
778                        # Default initialize standalone settings to True
779                        value = True
780                    elif len(pair) == 2:
781                        value = pair[1].strip()
782
783                    kwargs[key] = to_bool(value)
784
785            except ValueError:
786                raise ValueError(
787                    f"Invalid dialect format: '{dialect}'. "
788                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
789                )
790
791            result = cls.get(dialect_name.strip())
792            if not result:
793                from difflib import get_close_matches
794
795                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
796                if similar:
797                    similar = f" Did you mean {similar}?"
798
799                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
800
801            return result(**kwargs)
802
803        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")

Look up a dialect in the global dialect registry and return it if it exists.

Arguments:
  • dialect: The target dialect. If this is a string, it can be optionally followed by additional key-value pairs that are separated by commas and are used to specify dialect settings, such as whether the dialect's identifiers are case-sensitive.
Example:
>>> dialect = dialect_class = get_or_raise("duckdb")
>>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
Returns:

The corresponding Dialect instance.

@classmethod
def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]:
805    @classmethod
806    def format_time(
807        cls, expression: t.Optional[str | exp.Expression]
808    ) -> t.Optional[exp.Expression]:
809        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
810        if isinstance(expression, str):
811            return exp.Literal.string(
812                # the time formats are quoted
813                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
814            )
815
816        if expression and expression.is_string:
817            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
818
819        return expression

Converts a time format in this dialect to its equivalent Python strftime format.

settings
def normalize_identifier(self, expression: ~E) -> ~E:
839    def normalize_identifier(self, expression: E) -> E:
840        """
841        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
842
843        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
844        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
845        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
846        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
847
848        There are also dialects like Spark, which are case-insensitive even when quotes are
849        present, and dialects like MySQL, whose resolution rules match those employed by the
850        underlying operating system, for example they may always be case-sensitive in Linux.
851
852        Finally, the normalization behavior of some engines can even be controlled through flags,
853        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
854
855        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
856        that it can analyze queries in the optimizer and successfully capture their semantics.
857        """
858        if (
859            isinstance(expression, exp.Identifier)
860            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
861            and (
862                not expression.quoted
863                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
864            )
865        ):
866            expression.set(
867                "this",
868                (
869                    expression.this.upper()
870                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
871                    else expression.this.lower()
872                ),
873            )
874
875        return expression

Transforms an identifier in a way that resembles how it'd be resolved by this dialect.

For example, an identifier like FoO would be resolved as foo in Postgres, because it lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive, and so any normalization would be prohibited in order to avoid "breaking" the identifier.

There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.

Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.

SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.

def case_sensitive(self, text: str) -> bool:
877    def case_sensitive(self, text: str) -> bool:
878        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
879        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
880            return False
881
882        unsafe = (
883            str.islower
884            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
885            else str.isupper
886        )
887        return any(unsafe(char) for char in text)

Checks if text contains any case sensitive characters, based on the dialect's rules.

def can_identify(self, text: str, identify: str | bool = 'safe') -> bool:
889    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
890        """Checks if text can be identified given an identify option.
891
892        Args:
893            text: The text to check.
894            identify:
895                `"always"` or `True`: Always returns `True`.
896                `"safe"`: Only returns `True` if the identifier is case-insensitive.
897
898        Returns:
899            Whether the given text can be identified.
900        """
901        if identify is True or identify == "always":
902            return True
903
904        if identify == "safe":
905            return not self.case_sensitive(text)
906
907        return False

Checks if text can be identified given an identify option.

Arguments:
  • text: The text to check.
  • identify: "always" or True: Always returns True. "safe": Only returns True if the identifier is case-insensitive.
Returns:

Whether the given text can be identified.

def quote_identifier(self, expression: ~E, identify: bool = True) -> ~E:
909    def quote_identifier(self, expression: E, identify: bool = True) -> E:
910        """
911        Adds quotes to a given identifier.
912
913        Args:
914            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
915            identify: If set to `False`, the quotes will only be added if the identifier is deemed
916                "unsafe", with respect to its characters and this dialect's normalization strategy.
917        """
918        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
919            name = expression.this
920            expression.set(
921                "quoted",
922                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
923            )
924
925        return expression

Adds quotes to a given identifier.

Arguments:
  • expression: The expression of interest. If it's not an Identifier, this method is a no-op.
  • identify: If set to False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
def to_json_path( self, path: Optional[sqlglot.expressions.Expression]) -> Optional[sqlglot.expressions.Expression]:
927    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
928        if isinstance(path, exp.Literal):
929            path_text = path.name
930            if path.is_number:
931                path_text = f"[{path_text}]"
932            try:
933                return parse_json_path(path_text, self)
934            except ParseError as e:
935                if self.STRICT_JSON_PATH_SYNTAX:
936                    logger.warning(f"Invalid JSON path syntax. {str(e)}")
937
938        return path
def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
940    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
941        return self.parser(**opts).parse(self.tokenize(sql), sql)
def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
943    def parse_into(
944        self, expression_type: exp.IntoType, sql: str, **opts
945    ) -> t.List[t.Optional[exp.Expression]]:
946        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
def generate( self, expression: sqlglot.expressions.Expression, copy: bool = True, **opts) -> str:
948    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
949        return self.generator(**opts).generate(expression, copy=copy)
def transpile(self, sql: str, **opts) -> List[str]:
951    def transpile(self, sql: str, **opts) -> t.List[str]:
952        return [
953            self.generate(expression, copy=False, **opts) if expression else ""
954            for expression in self.parse(sql)
955        ]
def tokenize(self, sql: str) -> List[sqlglot.tokens.Token]:
957    def tokenize(self, sql: str) -> t.List[Token]:
958        return self.tokenizer.tokenize(sql)
tokenizer: sqlglot.tokens.Tokenizer
960    @property
961    def tokenizer(self) -> Tokenizer:
962        return self.tokenizer_class(dialect=self)
jsonpath_tokenizer: sqlglot.jsonpath.JSONPathTokenizer
964    @property
965    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
966        return self.jsonpath_tokenizer_class(dialect=self)
def parser(self, **opts) -> sqlglot.parser.Parser:
968    def parser(self, **opts) -> Parser:
969        return self.parser_class(dialect=self, **opts)
def generator(self, **opts) -> sqlglot.generator.Generator:
971    def generator(self, **opts) -> Generator:
972        return self.generator_class(dialect=self, **opts)
DialectType = typing.Union[str, Dialect, typing.Type[Dialect], NoneType]
def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
978def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
979    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
@unsupported_args('accuracy')
def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str:
982@unsupported_args("accuracy")
983def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
984    return self.func("APPROX_COUNT_DISTINCT", expression.this)
def if_sql( name: str = 'IF', false_value: Union[str, sqlglot.expressions.Expression, NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.If], str]:
987def if_sql(
988    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
989) -> t.Callable[[Generator, exp.If], str]:
990    def _if_sql(self: Generator, expression: exp.If) -> str:
991        return self.func(
992            name,
993            expression.this,
994            expression.args.get("true"),
995            expression.args.get("false") or false_value,
996        )
997
998    return _if_sql
def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]) -> str:
1001def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1002    this = expression.this
1003    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
1004        this.replace(exp.cast(this, exp.DataType.Type.JSON))
1005
1006    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
1009def inline_array_sql(self: Generator, expression: exp.Array) -> str:
1010    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
def inline_array_unless_query( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
1013def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
1014    elem = seq_get(expression.expressions, 0)
1015    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
1016        return self.func("ARRAY", elem)
1017    return inline_array_sql(self, expression)
def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str:
1020def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
1021    return self.like_sql(
1022        exp.Like(
1023            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
1024        )
1025    )
def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str:
1028def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
1029    zone = self.sql(expression, "this")
1030    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str:
1033def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
1034    if expression.args.get("recursive"):
1035        self.unsupported("Recursive CTEs are unsupported")
1036        expression.args["recursive"] = False
1037    return self.with_sql(expression)
def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide, if_sql: str = 'IF') -> str:
1040def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide, if_sql: str = "IF") -> str:
1041    n = self.sql(expression, "this")
1042    d = self.sql(expression, "expression")
1043    return f"{if_sql}(({d}) <> 0, ({n}) / ({d}), NULL)"
def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str:
1046def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
1047    self.unsupported("TABLESAMPLE unsupported")
1048    return self.sql(expression.this)
def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str:
1051def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
1052    self.unsupported("PIVOT unsupported")
1053    return ""
def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str:
1056def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
1057    return self.cast_sql(expression)
def no_comment_column_constraint_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CommentColumnConstraint) -> str:
1060def no_comment_column_constraint_sql(
1061    self: Generator, expression: exp.CommentColumnConstraint
1062) -> str:
1063    self.unsupported("CommentColumnConstraint unsupported")
1064    return ""
def no_map_from_entries_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.MapFromEntries) -> str:
1067def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
1068    self.unsupported("MAP_FROM_ENTRIES unsupported")
1069    return ""
def property_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Property) -> str:
1072def property_sql(self: Generator, expression: exp.Property) -> str:
1073    return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition, generate_instance: bool = False, str_position_func_name: str = 'STRPOS') -> str:
1076def str_position_sql(
1077    self: Generator,
1078    expression: exp.StrPosition,
1079    generate_instance: bool = False,
1080    str_position_func_name: str = "STRPOS",
1081) -> str:
1082    this = self.sql(expression, "this")
1083    substr = self.sql(expression, "substr")
1084    position = self.sql(expression, "position")
1085    instance = expression.args.get("instance") if generate_instance else None
1086    position_offset = ""
1087
1088    if position:
1089        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
1090        this = self.func("SUBSTR", this, position)
1091        position_offset = f" + {position} - 1"
1092
1093    return self.func(str_position_func_name, this, substr, instance) + position_offset
def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str:
1096def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
1097    return (
1098        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
1099    )
def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str:
1102def var_map_sql(
1103    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
1104) -> str:
1105    keys = expression.args["keys"]
1106    values = expression.args["values"]
1107
1108    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
1109        self.unsupported("Cannot convert array columns into map.")
1110        return self.func(map_func_name, keys, values)
1111
1112    args = []
1113    for key, value in zip(keys.expressions, values.expressions):
1114        args.append(self.sql(key))
1115        args.append(self.sql(value))
1116
1117    return self.func(map_func_name, *args)
def build_formatted_time( exp_class: Type[~E], dialect: str, default: Union[str, bool, NoneType] = None) -> Callable[[List], ~E]:
1120def build_formatted_time(
1121    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
1122) -> t.Callable[[t.List], E]:
1123    """Helper used for time expressions.
1124
1125    Args:
1126        exp_class: the expression class to instantiate.
1127        dialect: target sql dialect.
1128        default: the default format, True being time.
1129
1130    Returns:
1131        A callable that can be used to return the appropriately formatted time expression.
1132    """
1133
1134    def _builder(args: t.List):
1135        return exp_class(
1136            this=seq_get(args, 0),
1137            format=Dialect[dialect].format_time(
1138                seq_get(args, 1)
1139                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
1140            ),
1141        )
1142
1143    return _builder

Helper used for time expressions.

Arguments:
  • exp_class: the expression class to instantiate.
  • dialect: target sql dialect.
  • default: the default format, True being time.
Returns:

A callable that can be used to return the appropriately formatted time expression.

def time_format( dialect: Union[str, Dialect, Type[Dialect], NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.UnixToStr | sqlglot.expressions.StrToUnix], Optional[str]]:
1146def time_format(
1147    dialect: DialectType = None,
1148) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
1149    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
1150        """
1151        Returns the time format for a given expression, unless it's equivalent
1152        to the default time format of the dialect of interest.
1153        """
1154        time_format = self.format_time(expression)
1155        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
1156
1157    return _time_format
def build_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None, default_unit: Optional[str] = 'DAY') -> Callable[[List], ~E]:
1160def build_date_delta(
1161    exp_class: t.Type[E],
1162    unit_mapping: t.Optional[t.Dict[str, str]] = None,
1163    default_unit: t.Optional[str] = "DAY",
1164) -> t.Callable[[t.List], E]:
1165    def _builder(args: t.List) -> E:
1166        unit_based = len(args) == 3
1167        this = args[2] if unit_based else seq_get(args, 0)
1168        unit = None
1169        if unit_based or default_unit:
1170            unit = args[0] if unit_based else exp.Literal.string(default_unit)
1171            unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
1172        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
1173
1174    return _builder
def build_date_delta_with_interval(expression_class: Type[~E]) -> Callable[[List], Optional[~E]]:
1177def build_date_delta_with_interval(
1178    expression_class: t.Type[E],
1179) -> t.Callable[[t.List], t.Optional[E]]:
1180    def _builder(args: t.List) -> t.Optional[E]:
1181        if len(args) < 2:
1182            return None
1183
1184        interval = args[1]
1185
1186        if not isinstance(interval, exp.Interval):
1187            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
1188
1189        return expression_class(this=args[0], expression=interval.this, unit=unit_to_str(interval))
1190
1191    return _builder
def date_trunc_to_time( args: List) -> sqlglot.expressions.DateTrunc | sqlglot.expressions.TimestampTrunc:
1194def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
1195    unit = seq_get(args, 0)
1196    this = seq_get(args, 1)
1197
1198    if isinstance(this, exp.Cast) and this.is_type("date"):
1199        return exp.DateTrunc(unit=unit, this=this)
1200    return exp.TimestampTrunc(this=this, unit=unit)
def date_add_interval_sql( data_type: str, kind: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
1203def date_add_interval_sql(
1204    data_type: str, kind: str
1205) -> t.Callable[[Generator, exp.Expression], str]:
1206    def func(self: Generator, expression: exp.Expression) -> str:
1207        this = self.sql(expression, "this")
1208        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
1209        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
1210
1211    return func
def timestamptrunc_sql( zone: bool = False) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.TimestampTrunc], str]:
1214def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
1215    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
1216        args = [unit_to_str(expression), expression.this]
1217        if zone:
1218            args.append(expression.args.get("zone"))
1219        return self.func("DATE_TRUNC", *args)
1220
1221    return _timestamptrunc_sql
def no_timestamp_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Timestamp) -> str:
1224def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
1225    zone = expression.args.get("zone")
1226    if not zone:
1227        from sqlglot.optimizer.annotate_types import annotate_types
1228
1229        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
1230        return self.sql(exp.cast(expression.this, target_type))
1231    if zone.name.lower() in TIMEZONES:
1232        return self.sql(
1233            exp.AtTimeZone(
1234                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
1235                zone=zone,
1236            )
1237        )
1238    return self.func("TIMESTAMP", expression.this, zone)
def no_time_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Time) -> str:
1241def no_time_sql(self: Generator, expression: exp.Time) -> str:
1242    # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME)
1243    this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)
1244    expr = exp.cast(
1245        exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME
1246    )
1247    return self.sql(expr)
def no_datetime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Datetime) -> str:
1250def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str:
1251    this = expression.this
1252    expr = expression.expression
1253
1254    if expr.name.lower() in TIMEZONES:
1255        # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP)
1256        this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ)
1257        this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP)
1258        return self.sql(this)
1259
1260    this = exp.cast(this, exp.DataType.Type.DATE)
1261    expr = exp.cast(expr, exp.DataType.Type.TIME)
1262
1263    return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
def locate_to_strposition(args: List) -> sqlglot.expressions.Expression:
1266def locate_to_strposition(args: t.List) -> exp.Expression:
1267    return exp.StrPosition(
1268        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
1269    )
def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
1272def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
1273    return self.func(
1274        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
1275    )
def left_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
1278def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1279    return self.sql(
1280        exp.Substring(
1281            this=expression.this, start=exp.Literal.number(1), length=expression.expression
1282        )
1283    )
def right_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
1286def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1287    return self.sql(
1288        exp.Substring(
1289            this=expression.this,
1290            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
1291        )
1292    )
def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime, include_precision: bool = False) -> str:
1295def timestrtotime_sql(
1296    self: Generator,
1297    expression: exp.TimeStrToTime,
1298    include_precision: bool = False,
1299) -> str:
1300    datatype = exp.DataType.build(
1301        exp.DataType.Type.TIMESTAMPTZ
1302        if expression.args.get("zone")
1303        else exp.DataType.Type.TIMESTAMP
1304    )
1305
1306    if isinstance(expression.this, exp.Literal) and include_precision:
1307        precision = subsecond_precision(expression.this.name)
1308        if precision > 0:
1309            datatype = exp.DataType.build(
1310                datatype.this, expressions=[exp.DataTypeParam(this=exp.Literal.number(precision))]
1311            )
1312
1313    return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))
def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str:
1316def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
1317    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
def encode_decode_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression, name: str, replace: bool = True) -> str:
1321def encode_decode_sql(
1322    self: Generator, expression: exp.Expression, name: str, replace: bool = True
1323) -> str:
1324    charset = expression.args.get("charset")
1325    if charset and charset.name.lower() != "utf-8":
1326        self.unsupported(f"Expected utf-8 character set, got {charset}.")
1327
1328    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
def min_or_least( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Min) -> str:
1331def min_or_least(self: Generator, expression: exp.Min) -> str:
1332    name = "LEAST" if expression.expressions else "MIN"
1333    return rename_func(name)(self, expression)
def max_or_greatest( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Max) -> str:
1336def max_or_greatest(self: Generator, expression: exp.Max) -> str:
1337    name = "GREATEST" if expression.expressions else "MAX"
1338    return rename_func(name)(self, expression)
def count_if_to_sum( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CountIf) -> str:
1341def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
1342    cond = expression.this
1343
1344    if isinstance(expression.this, exp.Distinct):
1345        cond = expression.this.expressions[0]
1346        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
1347
1348    return self.func("sum", exp.func("if", cond, 1, 0))
def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str:
1351def trim_sql(self: Generator, expression: exp.Trim) -> str:
1352    target = self.sql(expression, "this")
1353    trim_type = self.sql(expression, "position")
1354    remove_chars = self.sql(expression, "expression")
1355    collation = self.sql(expression, "collation")
1356
1357    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
1358    if not remove_chars:
1359        return self.trim_sql(expression)
1360
1361    trim_type = f"{trim_type} " if trim_type else ""
1362    remove_chars = f"{remove_chars} " if remove_chars else ""
1363    from_part = "FROM " if trim_type or remove_chars else ""
1364    collation = f" COLLATE {collation}" if collation else ""
1365    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
def str_to_time_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression) -> str:
1368def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1369    return self.func("STRPTIME", expression.this, self.format_time(expression))
def concat_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Concat) -> str:
1372def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1373    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
def concat_ws_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ConcatWs) -> str:
1376def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1377    delim, *rest_args = expression.expressions
1378    return self.sql(
1379        reduce(
1380            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1381            rest_args,
1382        )
1383    )
@unsupported_args('position', 'occurrence', 'parameters')
def regexp_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpExtract | sqlglot.expressions.RegexpExtractAll) -> str:
1386@unsupported_args("position", "occurrence", "parameters")
1387def regexp_extract_sql(
1388    self: Generator, expression: exp.RegexpExtract | exp.RegexpExtractAll
1389) -> str:
1390    group = expression.args.get("group")
1391
1392    # Do not render group if it's the default value for this dialect
1393    if group and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP):
1394        group = None
1395
1396    return self.func(expression.sql_name(), expression.this, expression.expression, group)
@unsupported_args('position', 'occurrence', 'modifiers')
def regexp_replace_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpReplace) -> str:
1399@unsupported_args("position", "occurrence", "modifiers")
1400def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1401    return self.func(
1402        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1403    )
def pivot_column_names( aggregations: List[sqlglot.expressions.Expression]<