Edit on GitHub

sqlglot.generators.duckdb

   1from __future__ import annotations
   2
   3from decimal import Decimal
   4from itertools import groupby
   5import re
   6import typing as t
   7
   8from sqlglot import exp, generator, transforms
   9
  10from sqlglot.dialects.dialect import (
  11    DATETIME_DELTA,
  12    JSON_EXTRACT_TYPE,
  13    approx_count_distinct_sql,
  14    array_append_sql,
  15    array_compact_sql,
  16    array_concat_sql,
  17    arrow_json_extract_sql,
  18    count_if_to_sum,
  19    date_delta_to_binary_interval_op,
  20    datestrtodate_sql,
  21    encode_decode_sql,
  22    explode_to_unnest_sql,
  23    generate_series_sql,
  24    getbit_sql,
  25    groupconcat_sql,
  26    inline_array_unless_query,
  27    months_between_sql,
  28    no_datetime_sql,
  29    no_comment_column_constraint_sql,
  30    no_make_interval_sql,
  31    no_time_sql,
  32    no_timestamp_sql,
  33    rename_func,
  34    remove_from_array_using_filter,
  35    strposition_sql,
  36    str_to_time_sql,
  37    timestrtotime_sql,
  38    unit_to_str,
  39)
  40from sqlglot.generator import unsupported_args
  41from sqlglot.helper import is_date_unit, seq_get
  42from builtins import type as Type
  43
  44# Regex to detect time zones in timestamps of the form [+|-]TT[:tt]
  45# The pattern matches timezone offsets that appear after the time portion
  46TIMEZONE_PATTERN = re.compile(r":\d{2}.*?[+\-]\d{2}(?::\d{2})?")
  47
  48# Characters that must be escaped when building regex expressions in INITCAP
  49REGEX_ESCAPE_REPLACEMENTS = {
  50    "\\": "\\\\",
  51    "-": r"\-",
  52    "^": r"\^",
  53    "[": r"\[",
  54    "]": r"\]",
  55}
  56
  57# Used to in RANDSTR transpilation
  58RANDSTR_CHAR_POOL = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
  59RANDSTR_SEED = 123456
  60
  61# Whitespace control characters that DuckDB must process with `CHR({val})` calls
  62WS_CONTROL_CHARS_TO_DUCK = {
  63    "\u000b": 11,
  64    "\u001c": 28,
  65    "\u001d": 29,
  66    "\u001e": 30,
  67    "\u001f": 31,
  68}
  69
  70# Days of week to ISO 8601 day-of-week numbers
  71# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
  72WEEK_START_DAY_TO_DOW = {
  73    "MONDAY": 1,
  74    "TUESDAY": 2,
  75    "WEDNESDAY": 3,
  76    "THURSDAY": 4,
  77    "FRIDAY": 5,
  78    "SATURDAY": 6,
  79    "SUNDAY": 7,
  80}
  81
  82MAX_BIT_POSITION = exp.Literal.number(32768)
  83
  84# cs/as/ps are Snowflake defaults; DuckDB already behaves the same way, so they are safe to drop.
  85# Note: "as" is also a reserved keyword in DuckDB, making it impossible to pass through.
  86_SNOWFLAKE_COLLATION_DEFAULTS = frozenset({"cs", "as", "ps"})
  87_SNOWFLAKE_COLLATION_UNSUPPORTED = frozenset(
  88    {"ci", "ai", "upper", "lower", "utf8", "bin", "pi", "fl", "fu", "trim", "ltrim", "rtrim"}
  89)
  90
  91# Window functions that support IGNORE/RESPECT NULLS in DuckDB
  92_IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = (
  93    exp.FirstValue,
  94    exp.Lag,
  95    exp.LastValue,
  96    exp.Lead,
  97    exp.NthValue,
  98)
  99
 100# SEQ function constants
 101_SEQ_BASE: exp.Expr = exp.maybe_parse("(ROW_NUMBER() OVER (ORDER BY 1) - 1)")
 102_SEQ_RESTRICTED = (exp.Where, exp.Having, exp.AggFunc, exp.Order, exp.Select)
 103# Maps SEQ expression types to their byte width (suffix indicates bytes: SEQ1=1, SEQ2=2, etc.)
 104_SEQ_BYTE_WIDTH = {exp.Seq1: 1, exp.Seq2: 2, exp.Seq4: 4, exp.Seq8: 8}
 105
 106# Template for generating signed and unsigned SEQ values within a specified range
 107_SEQ_UNSIGNED: exp.Expr = exp.maybe_parse(":base % :max_val")
 108_SEQ_SIGNED: exp.Expr = exp.maybe_parse(
 109    "(CASE WHEN :base % :max_val >= :half "
 110    "THEN :base % :max_val - :max_val "
 111    "ELSE :base % :max_val END)"
 112)
 113
 114
 115def _apply_base64_alphabet_replacements(
 116    result: exp.Expr,
 117    alphabet: exp.Expr | None,
 118    reverse: bool = False,
 119) -> exp.Expr:
 120    """
 121    Apply base64 alphabet character replacements.
 122
 123    Base64 alphabet can be 1-3 chars: 1st = index 62 ('+'), 2nd = index 63 ('/'), 3rd = padding ('=').
 124    zip truncates to the shorter string, so 1-char alphabet only replaces '+', 2-char replaces '+/', etc.
 125
 126    Args:
 127        result: The expression to apply replacements to
 128        alphabet: Custom alphabet literal (expected chars for +/=)
 129        reverse: If False, replace default with custom (encode)
 130                 If True, replace custom with default (decode)
 131    """
 132    if isinstance(alphabet, exp.Literal) and alphabet.is_string:
 133        for default_char, new_char in zip("+/=", alphabet.this):
 134            if new_char != default_char:
 135                find, replace = (new_char, default_char) if reverse else (default_char, new_char)
 136                result = exp.Replace(
 137                    this=result,
 138                    expression=exp.Literal.string(find),
 139                    replacement=exp.Literal.string(replace),
 140                )
 141    return result
 142
 143
 144def _base64_decode_sql(self: DuckDBGenerator, expression: exp.Expr, to_string: bool) -> str:
 145    """
 146    Transpile Snowflake BASE64_DECODE_STRING/BINARY to DuckDB.
 147
 148    DuckDB uses FROM_BASE64() which returns BLOB. For string output, wrap with DECODE().
 149    Custom alphabets require REPLACE() calls to convert to standard base64.
 150    """
 151    input_expr = expression.this
 152    alphabet = expression.args.get("alphabet")
 153
 154    # Handle custom alphabet by replacing non-standard chars with standard ones
 155    input_expr = _apply_base64_alphabet_replacements(input_expr, alphabet, reverse=True)
 156
 157    # FROM_BASE64 returns BLOB
 158    input_expr = exp.FromBase64(this=input_expr)
 159
 160    if to_string:
 161        input_expr = exp.Decode(this=input_expr)
 162
 163    return self.sql(input_expr)
 164
 165
 166def _last_day_sql(self: DuckDBGenerator, expression: exp.LastDay) -> str:
 167    """
 168    DuckDB's LAST_DAY only supports finding the last day of a month.
 169    For other date parts (year, quarter, week), we need to implement equivalent logic.
 170    """
 171    date_expr = expression.this
 172    unit = expression.text("unit")
 173
 174    if not unit or unit.upper() == "MONTH":
 175        # Default behavior - use DuckDB's native LAST_DAY
 176        return self.func("LAST_DAY", date_expr)
 177
 178    if unit.upper() == "YEAR":
 179        # Last day of year: December 31st of the same year
 180        year_expr = exp.func("EXTRACT", "YEAR", date_expr)
 181        make_date_expr = exp.func(
 182            "MAKE_DATE", year_expr, exp.Literal.number(12), exp.Literal.number(31)
 183        )
 184        return self.sql(make_date_expr)
 185
 186    if unit.upper() == "QUARTER":
 187        # Last day of quarter
 188        year_expr = exp.func("EXTRACT", "YEAR", date_expr)
 189        quarter_expr = exp.func("EXTRACT", "QUARTER", date_expr)
 190
 191        # Calculate last month of quarter: quarter * 3. Quarter can be 1 to 4
 192        last_month_expr = exp.Mul(this=quarter_expr, expression=exp.Literal.number(3))
 193        first_day_last_month_expr = exp.func(
 194            "MAKE_DATE", year_expr, last_month_expr, exp.Literal.number(1)
 195        )
 196
 197        # Last day of the last month of the quarter
 198        last_day_expr = exp.func("LAST_DAY", first_day_last_month_expr)
 199        return self.sql(last_day_expr)
 200
 201    if unit.upper() == "WEEK":
 202        # DuckDB DAYOFWEEK: Sunday=0, Monday=1, ..., Saturday=6
 203        dow = exp.func("EXTRACT", "DAYOFWEEK", date_expr)
 204        # Days to the last day of week: (7 - dayofweek) % 7, assuming the last day of week is Sunday (Snowflake)
 205        # Wrap in parentheses to ensure correct precedence
 206        days_to_sunday_expr = exp.Mod(
 207            this=exp.Paren(this=exp.Sub(this=exp.Literal.number(7), expression=dow)),
 208            expression=exp.Literal.number(7),
 209        )
 210        interval_expr = exp.Interval(this=days_to_sunday_expr, unit=exp.var("DAY"))
 211        add_expr = exp.Add(this=date_expr, expression=interval_expr)
 212        cast_expr = exp.cast(add_expr, exp.DType.DATE)
 213        return self.sql(cast_expr)
 214
 215    self.unsupported(f"Unsupported date part '{unit}' in LAST_DAY function")
 216    return self.function_fallback_sql(expression)
 217
 218
 219def _is_nanosecond_unit(unit: exp.Expr | None) -> bool:
 220    return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND"
 221
 222
 223def _handle_nanosecond_diff(
 224    self: DuckDBGenerator,
 225    end_time: exp.Expr,
 226    start_time: exp.Expr,
 227) -> str:
 228    """Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it."""
 229    end_ns = exp.cast(end_time, exp.DType.TIMESTAMP_NS)
 230    start_ns = exp.cast(start_time, exp.DType.TIMESTAMP_NS)
 231
 232    # Build expression tree: EPOCH_NS(end) - EPOCH_NS(start)
 233    return self.sql(
 234        exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns))
 235    )
 236
 237
 238def _to_boolean_sql(self: DuckDBGenerator, expression: exp.ToBoolean) -> str:
 239    """
 240    Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent.
 241
 242    DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'.
 243    We need to handle the 'on'/'off' cases explicitly.
 244
 245    For TO_BOOLEAN (safe=False): NaN and INF values cause errors. We use DuckDB's native ERROR()
 246    function to replicate this behavior with a clear error message.
 247
 248    For TRY_TO_BOOLEAN (safe=True): Use DuckDB's TRY_CAST for conversion, which returns NULL
 249    for invalid inputs instead of throwing errors.
 250    """
 251    arg = expression.this
 252    is_safe = expression.args.get("safe", False)
 253
 254    base_case_expr = (
 255        exp.case()
 256        .when(
 257            # Handle 'on' -> TRUE (case insensitive)
 258            exp.Upper(this=exp.cast(arg, exp.DType.VARCHAR)).eq(exp.Literal.string("ON")),
 259            exp.true(),
 260        )
 261        .when(
 262            # Handle 'off' -> FALSE (case insensitive)
 263            exp.Upper(this=exp.cast(arg, exp.DType.VARCHAR)).eq(exp.Literal.string("OFF")),
 264            exp.false(),
 265        )
 266    )
 267
 268    if is_safe:
 269        # TRY_TO_BOOLEAN: handle 'on'/'off' and use TRY_CAST for everything else
 270        case_expr = base_case_expr.else_(exp.func("TRY_CAST", arg, exp.DType.BOOLEAN.into_expr()))
 271    else:
 272        # TO_BOOLEAN: handle NaN/INF errors, 'on'/'off', and use regular CAST
 273        cast_to_real = exp.func("TRY_CAST", arg, exp.DType.FLOAT.into_expr())
 274
 275        # Check for NaN and INF values
 276        nan_inf_check = exp.Or(
 277            this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
 278        )
 279
 280        case_expr = base_case_expr.when(
 281            nan_inf_check,
 282            exp.func(
 283                "ERROR",
 284                exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
 285            ),
 286        ).else_(exp.cast(arg, exp.DType.BOOLEAN))
 287
 288    return self.sql(case_expr)
 289
 290
 291# BigQuery -> DuckDB conversion for the DATE function
 292def _date_sql(self: DuckDBGenerator, expression: exp.Date) -> str:
 293    this = expression.this
 294    zone = self.sql(expression, "zone")
 295
 296    if zone:
 297        # BigQuery considers "this" at UTC, converts it to the specified
 298        # time zone and then keeps only the DATE part
 299        # To micmic that, we:
 300        #   (1) Cast to TIMESTAMP to remove DuckDB's local tz
 301        #   (2) Apply consecutive AtTimeZone calls for UTC -> zone conversion
 302        this = exp.cast(this, exp.DType.TIMESTAMP)
 303        at_utc = exp.AtTimeZone(this=this, zone=exp.Literal.string("UTC"))
 304        this = exp.AtTimeZone(this=at_utc, zone=zone)
 305
 306    return self.sql(exp.cast(expression=this, to=exp.DType.DATE))
 307
 308
 309# BigQuery -> DuckDB conversion for the TIME_DIFF function
 310def _timediff_sql(self: DuckDBGenerator, expression: exp.TimeDiff) -> str:
 311    unit = expression.unit
 312
 313    if _is_nanosecond_unit(unit):
 314        return _handle_nanosecond_diff(self, expression.expression, expression.this)
 315
 316    this = exp.cast(expression.this, exp.DType.TIME)
 317    expr = exp.cast(expression.expression, exp.DType.TIME)
 318
 319    # Although the 2 dialects share similar signatures, BQ seems to inverse
 320    # the sign of the result so the start/end time operands are flipped
 321    return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
 322
 323
 324def _date_delta_to_binary_interval_op(
 325    cast: bool = True,
 326) -> t.Callable[[DuckDBGenerator, DATETIME_DELTA], str]:
 327    """
 328    DuckDB override to handle:
 329    1. NANOSECOND operations (DuckDB doesn't support INTERVAL ... NANOSECOND)
 330    2. Float/decimal interval values (DuckDB INTERVAL requires integers)
 331    """
 332    base_impl = date_delta_to_binary_interval_op(cast=cast)
 333
 334    def _duckdb_date_delta_sql(self: DuckDBGenerator, expression: DATETIME_DELTA) -> str:
 335        unit = expression.unit
 336        interval_value = expression.expression
 337
 338        # Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND)
 339        if _is_nanosecond_unit(unit):
 340            if isinstance(interval_value, exp.Interval):
 341                interval_value = interval_value.this
 342
 343            timestamp_ns = exp.cast(expression.this, exp.DType.TIMESTAMP_NS)
 344
 345            return self.sql(
 346                exp.func(
 347                    "MAKE_TIMESTAMP_NS",
 348                    exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=interval_value),
 349                )
 350            )
 351
 352        # Handle float/decimal interval values as duckDB INTERVAL requires integer expressions
 353        if not interval_value or isinstance(interval_value, exp.Interval):
 354            return base_impl(self, expression)
 355
 356        if interval_value.is_type(*exp.DataType.REAL_TYPES):
 357            expression.set("expression", exp.cast(exp.func("ROUND", interval_value), "INT"))
 358
 359        return base_impl(self, expression)
 360
 361    return _duckdb_date_delta_sql
 362
 363
 364def _array_insert_sql(self: DuckDBGenerator, expression: exp.ArrayInsert) -> str:
 365    """
 366    Transpile ARRAY_INSERT to DuckDB using LIST_CONCAT and slicing.
 367
 368    Handles:
 369    - 0-based and 1-based indexing (normalizes to 0-based for calculations)
 370    - Negative position conversion (requires array length)
 371    - NULL propagation (source dialects return NULL, DuckDB creates single-element array)
 372    - Assumes position is within bounds per user constraint
 373
 374    Note: All dialects that support ARRAY_INSERT (Snowflake, Spark, Databricks) have
 375    ARRAY_FUNCS_PROPAGATES_NULLS=True, so we always assume source propagates NULLs.
 376
 377    Args:
 378        expression: The ArrayInsert expression to transpile.
 379
 380    Returns:
 381        SQL string implementing ARRAY_INSERT behavior.
 382    """
 383    this = expression.this
 384    position = expression.args.get("position")
 385    element = expression.expression
 386    element_array = exp.Array(expressions=[element])
 387    index_offset = expression.args.get("offset", 0)
 388
 389    if not position or not position.is_int:
 390        self.unsupported("ARRAY_INSERT can only be transpiled with a literal position")
 391        return self.func("ARRAY_INSERT", this, position, element)
 392
 393    pos_value = position.to_py()
 394
 395    # Normalize one-based indexing to zero-based for slice calculations
 396    # Spark (1-based) -> Snowflake (0-based):
 397    #   Positive: pos=1 -> pos=0 (subtract 1)
 398    #   Negative: pos=-2 -> pos=-1 (add 1)
 399    # Example: Spark array_insert([a,b,c], -2, d) -> [a,b,d,c] is same as Snowflake pos=-1
 400    if pos_value > 0:
 401        pos_value = pos_value - index_offset
 402    elif pos_value < 0:
 403        pos_value = pos_value + index_offset
 404
 405    # Build the appropriate list_concat expression based on position
 406    if pos_value == 0:
 407        # insert at beginning
 408        concat_exprs = [element_array, this]
 409    elif pos_value > 0:
 410        # Positive position: LIST_CONCAT(arr[1:pos], [elem], arr[pos+1:])
 411        # 0-based -> DuckDB 1-based slicing
 412
 413        # left slice: arr[1:pos]
 414        slice_start = exp.Bracket(
 415            this=this,
 416            expressions=[
 417                exp.Slice(this=exp.Literal.number(1), expression=exp.Literal.number(pos_value))
 418            ],
 419        )
 420
 421        # right slice: arr[pos+1:]
 422        slice_end = exp.Bracket(
 423            this=this, expressions=[exp.Slice(this=exp.Literal.number(pos_value + 1))]
 424        )
 425
 426        concat_exprs = [slice_start, element_array, slice_end]
 427    else:
 428        # Negative position: arr[1:LEN(arr)+pos], [elem], arr[LEN(arr)+pos+1:]
 429        # pos=-1 means insert before last element
 430        arr_len = exp.Length(this=this)
 431
 432        # Calculate slice position: LEN(arr) + pos (e.g., LEN(arr) + (-1) = LEN(arr) - 1)
 433        slice_end_pos = arr_len + exp.Literal.number(pos_value)
 434        slice_start_pos = slice_end_pos + exp.Literal.number(1)
 435
 436        # left slice: arr[1:LEN(arr)+pos]
 437        slice_start = exp.Bracket(
 438            this=this,
 439            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end_pos)],
 440        )
 441
 442        # right slice: arr[LEN(arr)+pos+1:]
 443        slice_end = exp.Bracket(this=this, expressions=[exp.Slice(this=slice_start_pos)])
 444
 445        concat_exprs = [slice_start, element_array, slice_end]
 446
 447    # All dialects that support ARRAY_INSERT propagate NULLs (Snowflake/Spark/Databricks)
 448    # Wrap in CASE WHEN array IS NULL THEN NULL ELSE func_expr END
 449    return self.sql(
 450        exp.If(
 451            this=exp.Is(this=this, expression=exp.Null()),
 452            true=exp.Null(),
 453            false=self.func("LIST_CONCAT", *concat_exprs),
 454        )
 455    )
 456
 457
 458def _array_remove_at_sql(self: DuckDBGenerator, expression: exp.ArrayRemoveAt) -> str:
 459    """
 460    Transpile ARRAY_REMOVE_AT to DuckDB using LIST_CONCAT and slicing.
 461
 462    Handles:
 463    - Positive positions (0-based indexing)
 464    - Negative positions (from end of array)
 465    - NULL propagation (Snowflake returns NULL for NULL array, DuckDB doesn't auto-propagate)
 466    - Only supports literal integer positions (non-literals remain untranspiled)
 467
 468    Transpilation patterns:
 469    - pos=0 (first): arr[2:]
 470    - pos>0 (middle): LIST_CONCAT(arr[1:p], arr[p+2:])
 471    - pos=-1 (last): arr[1:LEN(arr)-1]
 472    - pos<-1: LIST_CONCAT(arr[1:LEN(arr)+p], arr[LEN(arr)+p+2:])
 473
 474    All wrapped in: CASE WHEN arr IS NULL THEN NULL ELSE ... END
 475
 476    Args:
 477        expression: The ArrayRemoveAt expression to transpile.
 478
 479    Returns:
 480        SQL string implementing ARRAY_REMOVE_AT behavior.
 481    """
 482    this = expression.this
 483    position = expression.args.get("position")
 484
 485    if not position or not position.is_int:
 486        self.unsupported("ARRAY_REMOVE_AT can only be transpiled with a literal position")
 487        return self.func("ARRAY_REMOVE_AT", this, position)
 488
 489    pos_value = position.to_py()
 490
 491    # Build the appropriate expression based on position
 492    if pos_value == 0:
 493        # Remove first element: arr[2:]
 494        result_expr: exp.Expr | str = exp.Bracket(
 495            this=this,
 496            expressions=[exp.Slice(this=exp.Literal.number(2))],
 497        )
 498    elif pos_value > 0:
 499        # Remove at positive position: LIST_CONCAT(arr[1:pos], arr[pos+2:])
 500        # DuckDB uses 1-based slicing
 501        left_slice = exp.Bracket(
 502            this=this,
 503            expressions=[
 504                exp.Slice(this=exp.Literal.number(1), expression=exp.Literal.number(pos_value))
 505            ],
 506        )
 507        right_slice = exp.Bracket(
 508            this=this,
 509            expressions=[exp.Slice(this=exp.Literal.number(pos_value + 2))],
 510        )
 511        result_expr = self.func("LIST_CONCAT", left_slice, right_slice)
 512    elif pos_value == -1:
 513        # Remove last element: arr[1:LEN(arr)-1]
 514        # Optimization: simpler than general negative case
 515        arr_len = exp.Length(this=this)
 516        slice_end = arr_len + exp.Literal.number(-1)
 517        result_expr = exp.Bracket(
 518            this=this,
 519            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end)],
 520        )
 521    else:
 522        # Remove at negative position: LIST_CONCAT(arr[1:LEN(arr)+pos], arr[LEN(arr)+pos+2:])
 523        arr_len = exp.Length(this=this)
 524        slice_end_pos = arr_len + exp.Literal.number(pos_value)
 525        slice_start_pos = slice_end_pos + exp.Literal.number(2)
 526
 527        left_slice = exp.Bracket(
 528            this=this,
 529            expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end_pos)],
 530        )
 531        right_slice = exp.Bracket(
 532            this=this,
 533            expressions=[exp.Slice(this=slice_start_pos)],
 534        )
 535        result_expr = self.func("LIST_CONCAT", left_slice, right_slice)
 536
 537    # Snowflake ARRAY_FUNCS_PROPAGATES_NULLS=True, so wrap in NULL check
 538    # CASE WHEN array IS NULL THEN NULL ELSE result_expr END
 539    return self.sql(
 540        exp.If(
 541            this=exp.Is(this=this, expression=exp.Null()),
 542            true=exp.Null(),
 543            false=result_expr,
 544        )
 545    )
 546
 547
 548@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
 549def _array_sort_sql(self: DuckDBGenerator, expression: exp.ArraySort) -> str:
 550    return self.func("ARRAY_SORT", expression.this)
 551
 552
 553def _array_contains_sql(self: DuckDBGenerator, expression: exp.ArrayContains) -> str:
 554    this = expression.this
 555    expr = expression.expression
 556
 557    func = self.func("ARRAY_CONTAINS", this, expr)
 558
 559    if expression.args.get("check_null"):
 560        check_null_in_array = exp.Nullif(
 561            this=exp.NEQ(this=exp.ArraySize(this=this), expression=exp.func("LIST_COUNT", this)),
 562            expression=exp.false(),
 563        )
 564        return self.sql(exp.If(this=expr.is_(exp.Null()), true=check_null_in_array, false=func))
 565
 566    return func
 567
 568
 569def _array_overlaps_sql(self: DuckDBGenerator, expression: exp.ArrayOverlaps) -> str:
 570    """
 571    Translates Snowflake's NULL-safe ARRAYS_OVERLAP to DuckDB.
 572
 573    DuckDB's native && operator is not NULL-safe: [1,NULL,3] && [NULL,4,5] returns FALSE.
 574    Snowflake returns TRUE when both arrays contain NULL (NULLs are treated as known values).
 575
 576    Generated SQL: (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
 577
 578    ARRAY_LENGTH counts all elements (including NULLs); LIST_COUNT counts only non-NULLs.
 579    When they differ, the array contains at least one NULL, matching Snowflake's NULL-safe semantics.
 580    """
 581    if not expression.args.get("null_safe"):
 582        return self.binary(expression, "&&")
 583
 584    arr1 = expression.this
 585    arr2 = expression.expression
 586
 587    check_nulls = exp.and_(
 588        exp.NEQ(
 589            this=exp.ArraySize(this=arr1.copy()),
 590            expression=exp.func("LIST_COUNT", arr1.copy()),
 591        ),
 592        exp.NEQ(
 593            this=exp.ArraySize(this=arr2.copy()),
 594            expression=exp.func("LIST_COUNT", arr2.copy()),
 595        ),
 596        copy=False,
 597    )
 598
 599    overlap = exp.ArrayOverlaps(this=arr1.copy(), expression=arr2.copy())
 600
 601    return self.sql(
 602        exp.or_(
 603            exp.paren(overlap, copy=False),
 604            exp.paren(check_nulls, copy=False),
 605            copy=False,
 606            wrap=False,
 607        )
 608    )
 609
 610
 611def _struct_sql(self: DuckDBGenerator, expression: exp.Struct) -> str:
 612    ancestor_cast = expression.find_ancestor(exp.Cast, exp.Select)
 613    ancestor_cast = None if isinstance(ancestor_cast, exp.Select) else ancestor_cast
 614
 615    # Empty struct cast works with MAP() since DuckDB can't parse {}
 616    if not expression.expressions:
 617        if isinstance(ancestor_cast, exp.Cast) and ancestor_cast.to.is_type(exp.DType.MAP):
 618            return "MAP()"
 619
 620    args: list[str] = []
 621
 622    # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is
 623    # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB
 624    # The transformation to ROW will take place if:
 625    #  1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would
 626    #  2. A cast to STRUCT / ARRAY of STRUCTs is found
 627    is_bq_inline_struct = (
 628        (expression.find(exp.PropertyEQ) is None)
 629        and ancestor_cast
 630        and any(
 631            casted_type.is_type(exp.DType.STRUCT)
 632            for casted_type in ancestor_cast.find_all(exp.DataType)
 633        )
 634    )
 635
 636    for i, expr in enumerate(expression.expressions):
 637        is_property_eq = isinstance(expr, exp.PropertyEQ)
 638        this = expr.this
 639        value = expr.expression if is_property_eq else expr
 640
 641        if is_bq_inline_struct:
 642            args.append(self.sql(value))
 643        else:
 644            if isinstance(this, exp.Identifier):
 645                key = self.sql(exp.Literal.string(expr.name))
 646            elif is_property_eq:
 647                key = self.sql(this)
 648            else:
 649                key = self.sql(exp.Literal.string(f"_{i}"))
 650
 651            args.append(f"{key}: {self.sql(value)}")
 652
 653    csv_args = ", ".join(args)
 654
 655    return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}"
 656
 657
 658def _datatype_sql(self: DuckDBGenerator, expression: exp.DataType) -> str:
 659    if expression.is_type("array"):
 660        return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]"
 661
 662    # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE
 663    if expression.is_type(exp.DType.TIME, exp.DType.TIMETZ, exp.DType.TIMESTAMPTZ):
 664        return expression.this.value
 665
 666    return self.datatype_sql(expression)
 667
 668
 669def _json_format_sql(self: DuckDBGenerator, expression: exp.JSONFormat) -> str:
 670    sql = self.func("TO_JSON", expression.this, expression.args.get("options"))
 671    return f"CAST({sql} AS TEXT)"
 672
 673
 674def _build_seq_expression(base: exp.Expr, byte_width: int, signed: bool) -> exp.Expr:
 675    """Build a SEQ expression with the given base, byte width, and signedness."""
 676    bits = byte_width * 8
 677    max_val = exp.Literal.number(2**bits)
 678
 679    if signed:
 680        half = exp.Literal.number(2 ** (bits - 1))
 681        return exp.replace_placeholders(_SEQ_SIGNED.copy(), base=base, max_val=max_val, half=half)
 682    return exp.replace_placeholders(_SEQ_UNSIGNED.copy(), base=base, max_val=max_val)
 683
 684
 685def _seq_to_range_in_generator(expression: exp.Expr) -> exp.Expr:
 686    """
 687    Transform SEQ functions to `range` column references when inside a GENERATOR context.
 688
 689    When GENERATOR(ROWCOUNT => N) becomes RANGE(N) in DuckDB, it produces a column
 690    named `range` with values 0, 1, ..., N-1. SEQ functions produce the same sequence,
 691    so we replace them with `range % max_val` to avoid nested window function issues.
 692    """
 693    if not isinstance(expression, exp.Select):
 694        return expression
 695
 696    from_ = expression.args.get("from_")
 697    if not (
 698        from_
 699        and isinstance(from_.this, exp.TableFromRows)
 700        and isinstance(from_.this.this, exp.Generator)
 701    ):
 702        return expression
 703
 704    def replace_seq(node: exp.Expr) -> exp.Expr:
 705        if isinstance(node, (exp.Seq1, exp.Seq2, exp.Seq4, exp.Seq8)):
 706            byte_width = _SEQ_BYTE_WIDTH[type(node)]
 707            return _build_seq_expression(exp.column("range"), byte_width, signed=node.name == "1")
 708        return node
 709
 710    return expression.transform(replace_seq, copy=False)
 711
 712
 713def _seq_sql(self: DuckDBGenerator, expression: exp.Func, byte_width: int) -> str:
 714    """
 715    Transpile Snowflake SEQ1/SEQ2/SEQ4/SEQ8 to DuckDB.
 716
 717    Generates monotonically increasing integers starting from 0.
 718    The signed parameter (0 or 1) affects wrap-around behavior:
 719    - Unsigned (0): wraps at 2^(bits) - 1
 720    - Signed (1): wraps at 2^(bits-1) - 1, then goes negative
 721    """
 722    # Warn if SEQ is in a restricted context (Select stops search at current scope)
 723    ancestor = expression.find_ancestor(*_SEQ_RESTRICTED)
 724    if ancestor and (
 725        (not isinstance(ancestor, (exp.Order, exp.Select)))
 726        or (isinstance(ancestor, exp.Order) and isinstance(ancestor.parent, exp.Window))
 727    ):
 728        self.unsupported("SEQ in restricted context is not supported - use CTE or subquery")
 729
 730    result = _build_seq_expression(_SEQ_BASE.copy(), byte_width, signed=expression.name == "1")
 731    return self.sql(result)
 732
 733
 734def _unix_to_time_sql(self: DuckDBGenerator, expression: exp.UnixToTime) -> str:
 735    scale = expression.args.get("scale")
 736    timestamp = expression.this
 737    target_type = expression.args.get("target_type")
 738
 739    # Check if we need NTZ (naive timestamp in UTC)
 740    is_ntz = target_type and target_type.this in (
 741        exp.DType.TIMESTAMP,
 742        exp.DType.TIMESTAMPNTZ,
 743    )
 744
 745    if scale == exp.UnixToTime.MILLIS:
 746        # EPOCH_MS already returns TIMESTAMP (naive, UTC)
 747        return self.func("EPOCH_MS", timestamp)
 748    if scale == exp.UnixToTime.MICROS:
 749        # MAKE_TIMESTAMP already returns TIMESTAMP (naive, UTC)
 750        return self.func("MAKE_TIMESTAMP", timestamp)
 751
 752    # Other scales: divide and use TO_TIMESTAMP
 753    if scale not in (None, exp.UnixToTime.SECONDS):
 754        timestamp = exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))
 755
 756    to_timestamp: exp.Expr = exp.Anonymous(this="TO_TIMESTAMP", expressions=[timestamp])
 757
 758    if is_ntz:
 759        to_timestamp = exp.AtTimeZone(this=to_timestamp, zone=exp.Literal.string("UTC"))
 760
 761    return self.sql(to_timestamp)
 762
 763
 764WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In, exp.Not)
 765
 766
 767def _arrow_json_extract_sql(self: DuckDBGenerator, expression: JSON_EXTRACT_TYPE) -> str:
 768    arrow_sql = arrow_json_extract_sql(self, expression)
 769    if not expression.same_parent and isinstance(
 770        expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS
 771    ):
 772        arrow_sql = self.wrap(arrow_sql)
 773    return arrow_sql
 774
 775
 776def _implicit_datetime_cast(
 777    arg: exp.Expr | None, type: exp.DType = exp.DType.DATE
 778) -> exp.Expr | None:
 779    if isinstance(arg, exp.Literal) and arg.is_string:
 780        ts = arg.name
 781        if type == exp.DType.DATE and ":" in ts:
 782            type = exp.DType.TIMESTAMPTZ if TIMEZONE_PATTERN.search(ts) else exp.DType.TIMESTAMP
 783
 784        arg = exp.cast(arg, type)
 785
 786    return arg
 787
 788
 789def _week_unit_to_dow(unit: exp.Expr | None) -> int | None:
 790    """
 791    Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
 792    from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.
 793
 794    Args:
 795        unit: The unit expression (Var for ISOWEEK or WeekStart)
 796
 797    Returns:
 798        The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).
 799
 800        Examples:
 801            "WEEK(SUNDAY)" -> 7
 802            "WEEK(MONDAY)" -> 1
 803            "ISOWEEK" -> 1
 804    """
 805    # Handle plain Var expressions for ISOWEEK only
 806    if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
 807        return 1
 808
 809    # Handle WeekStart expressions with explicit day
 810    if isinstance(unit, exp.WeekStart):
 811        return WEEK_START_DAY_TO_DOW.get(unit.name.upper())
 812
 813    return None
 814
 815
 816def _build_week_trunc_expression(
 817    date_expr: exp.Expr,
 818    start_dow: int,
 819    preserve_start_day: bool = False,
 820) -> exp.Expr:
 821    """
 822    Build DATE_TRUNC expression for week boundaries with custom start day.
 823
 824    DuckDB's DATE_TRUNC('WEEK', ...) always returns Monday. To align to a different
 825    start day, we shift the date before truncating.
 826
 827    Args:
 828        date_expr: The date expression to truncate.
 829        start_dow: ISO 8601 day-of-week number (Monday=1, ..., Sunday=7).
 830        preserve_start_day: If True, reverse the shift after truncating so the result lands on the
 831            correct week start day. Needed for DATE_TRUNC (absolute result matters) but
 832            not for DATE_DIFF (only relative alignment matters).
 833
 834    Shift formula: Sunday (7) gets +1, others get (1 - start_dow).
 835    """
 836    shift_days = 1 if start_dow == 7 else 1 - start_dow
 837    truncated = exp.func("DATE_TRUNC", unit=exp.var("WEEK"), this=date_expr)
 838
 839    if shift_days == 0:
 840        return truncated
 841
 842    shift = exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY"))
 843    shifted_date = exp.DateAdd(this=date_expr, expression=shift)
 844    truncated.set("this", shifted_date)
 845
 846    if preserve_start_day:
 847        interval = exp.Interval(this=exp.Literal.string(str(-shift_days)), unit=exp.var("DAY"))
 848        return exp.cast(
 849            exp.DateAdd(this=truncated, expression=interval), to=exp.DType.DATE, copy=False
 850        )
 851
 852    return truncated
 853
 854
 855def _date_diff_sql(self: DuckDBGenerator, expression: exp.DateDiff | exp.DatetimeDiff) -> str:
 856    unit = expression.unit
 857
 858    if _is_nanosecond_unit(unit):
 859        return _handle_nanosecond_diff(self, expression.this, expression.expression)
 860
 861    this = _implicit_datetime_cast(expression.this)
 862    expr = _implicit_datetime_cast(expression.expression)
 863
 864    # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
 865    #  SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
 866    #  SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
 867    # Whereas for other units such as MONTH it does respect month boundaries:
 868    #  SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
 869    date_part_boundary = expression.args.get("date_part_boundary")
 870
 871    # Extract week start day; returns None if day is dynamic (column/placeholder)
 872    week_start = _week_unit_to_dow(unit)
 873    if date_part_boundary and week_start and this and expr:
 874        expression.set("unit", exp.Literal.string("WEEK"))
 875
 876        # Truncate both dates to week boundaries to respect input dialect semantics
 877        this = _build_week_trunc_expression(this, week_start)
 878        expr = _build_week_trunc_expression(expr, week_start)
 879
 880    return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
 881
 882
 883def _generate_datetime_array_sql(
 884    self: DuckDBGenerator, expression: exp.GenerateDateArray | exp.GenerateTimestampArray
 885) -> str:
 886    is_generate_date_array = isinstance(expression, exp.GenerateDateArray)
 887
 888    type = exp.DType.DATE if is_generate_date_array else exp.DType.TIMESTAMP
 889    start = _implicit_datetime_cast(expression.args.get("start"), type=type)
 890    end = _implicit_datetime_cast(expression.args.get("end"), type=type)
 891
 892    # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES
 893    gen_series: exp.GenerateSeries | exp.Cast = exp.GenerateSeries(
 894        start=start, end=end, step=expression.args.get("step")
 895    )
 896
 897    if is_generate_date_array:
 898        # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for
 899        # GENERATE_DATE_ARRAY we must cast it back to DATE array
 900        gen_series = exp.cast(gen_series, exp.DataType.from_str("ARRAY<DATE>"))
 901
 902    return self.sql(gen_series)
 903
 904
 905def _json_extract_value_array_sql(
 906    self: DuckDBGenerator, expression: exp.JSONValueArray | exp.JSONExtractArray
 907) -> str:
 908    json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression)
 909    data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>"
 910    return self.sql(exp.cast(json_extract, to=exp.DataType.from_str(data_type)))
 911
 912
 913def _cast_to_varchar(arg: exp.Expr | None) -> exp.Expr | None:
 914    if arg and arg.type and not arg.is_type(*exp.DataType.TEXT_TYPES, exp.DType.UNKNOWN):
 915        return exp.cast(arg, exp.DType.VARCHAR)
 916    return arg
 917
 918
 919def _cast_to_boolean(arg: exp.Expr | None) -> exp.Expr | None:
 920    if arg and not arg.is_type(exp.DType.BOOLEAN):
 921        return exp.cast(arg, exp.DType.BOOLEAN)
 922    return arg
 923
 924
 925def _is_binary(arg: exp.Expr) -> bool:
 926    return arg.is_type(
 927        exp.DType.BINARY,
 928        exp.DType.VARBINARY,
 929        exp.DType.BLOB,
 930    )
 931
 932
 933def _gen_with_cast_to_blob(self: DuckDBGenerator, expression: exp.Expr, result_sql: str) -> str:
 934    if _is_binary(expression):
 935        blob = exp.DataType.from_str("BLOB", dialect="duckdb")
 936        result_sql = self.sql(exp.Cast(this=result_sql, to=blob))
 937    return result_sql
 938
 939
 940def _cast_to_bit(arg: exp.Expr) -> exp.Expr:
 941    if not _is_binary(arg):
 942        return arg
 943
 944    if isinstance(arg, exp.HexString):
 945        arg = exp.Unhex(this=exp.Literal.string(arg.this))
 946
 947    return exp.cast(arg, exp.DType.BIT)
 948
 949
 950def _prepare_binary_bitwise_args(expression: exp.Binary) -> None:
 951    if _is_binary(expression.this):
 952        expression.set("this", _cast_to_bit(expression.this))
 953    if _is_binary(expression.expression):
 954        expression.set("expression", _cast_to_bit(expression.expression))
 955
 956
 957def _day_navigation_sql(self: DuckDBGenerator, expression: exp.NextDay | exp.PreviousDay) -> str:
 958    """
 959    Transpile Snowflake's NEXT_DAY / PREVIOUS_DAY to DuckDB using date arithmetic.
 960
 961    Returns the DATE of the next/previous occurrence of the specified weekday.
 962
 963    Formulas:
 964    - NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
 965    - PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
 966
 967    Supports both literal and non-literal day names:
 968    - Literal: Direct lookup (e.g., 'Monday' -> 1)
 969    - Non-literal: CASE statement for runtime evaluation
 970
 971    Examples:
 972        NEXT_DAY('2024-01-01' (Monday), 'Monday')
 973          -> (1 - 1 + 6) % 7 + 1 = 6 % 7 + 1 = 7 days -> 2024-01-08
 974
 975        PREVIOUS_DAY('2024-01-15' (Monday), 'Friday')
 976          -> (1 - 5 + 6) % 7 + 1 = 2 % 7 + 1 = 3 days -> 2024-01-12
 977    """
 978    date_expr = expression.this
 979    day_name_expr = expression.expression
 980
 981    # Build ISODOW call for current day of week
 982    isodow_call = exp.func("ISODOW", date_expr)
 983
 984    # Determine target day of week
 985    if isinstance(day_name_expr, exp.Literal):
 986        # Literal day name: lookup target_dow directly
 987        day_name_str = day_name_expr.name.upper()
 988        matching_day = next(
 989            (day for day in WEEK_START_DAY_TO_DOW if day.startswith(day_name_str)), None
 990        )
 991        if matching_day:
 992            target_dow: exp.Expr = exp.Literal.number(WEEK_START_DAY_TO_DOW[matching_day])
 993        else:
 994            # Unrecognized day name, use fallback
 995            return self.function_fallback_sql(expression)
 996    else:
 997        # Non-literal day name: build CASE statement for runtime mapping
 998        upper_day_name = exp.Upper(this=day_name_expr)
 999        target_dow = exp.Case(
1000            ifs=[
1001                exp.If(
1002                    this=exp.func(
1003                        "STARTS_WITH", upper_day_name.copy(), exp.Literal.string(day[:2])
1004                    ),
1005                    true=exp.Literal.number(dow_num),
1006                )
1007                for day, dow_num in WEEK_START_DAY_TO_DOW.items()
1008            ]
1009        )
1010
1011    # Calculate days offset and apply interval based on direction
1012    if isinstance(expression, exp.NextDay):
1013        # NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
1014        days_offset = exp.paren(target_dow - isodow_call + 6, copy=False) % 7 + 1
1015        date_with_offset = date_expr + exp.Interval(this=days_offset, unit=exp.var("DAY"))
1016    else:  # exp.PreviousDay
1017        # PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
1018        days_offset = exp.paren(isodow_call - target_dow + 6, copy=False) % 7 + 1
1019        date_with_offset = date_expr - exp.Interval(this=days_offset, unit=exp.var("DAY"))
1020
1021    # Build final: CAST(date_with_offset AS DATE)
1022    return self.sql(exp.cast(date_with_offset, exp.DType.DATE))
1023
1024
1025def _anyvalue_sql(self: DuckDBGenerator, expression: exp.AnyValue) -> str:
1026    # Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL
1027    having = expression.this
1028    if isinstance(having, exp.HavingMax):
1029        func_name = "ARG_MAX_NULL" if having.args.get("max") else "ARG_MIN_NULL"
1030        return self.func(func_name, having.this, having.expression)
1031    return self.function_fallback_sql(expression)
1032
1033
1034def _bitwise_agg_sql(
1035    self: DuckDBGenerator,
1036    expression: exp.BitwiseOrAgg | exp.BitwiseAndAgg | exp.BitwiseXorAgg,
1037) -> str:
1038    """
1039    DuckDB's bitwise aggregate functions only accept integer types. For other types:
1040    - DECIMAL/STRING: Use CAST(arg AS INT) to convert directly, will round to nearest int
1041    - FLOAT/DOUBLE: Use ROUND(arg)::INT to round to nearest integer, required due to float precision loss
1042    """
1043    if isinstance(expression, exp.BitwiseOrAgg):
1044        func_name = "BIT_OR"
1045    elif isinstance(expression, exp.BitwiseAndAgg):
1046        func_name = "BIT_AND"
1047    else:  # exp.BitwiseXorAgg
1048        func_name = "BIT_XOR"
1049
1050    arg = expression.this
1051
1052    if not arg.type:
1053        from sqlglot.optimizer.annotate_types import annotate_types
1054
1055        arg = annotate_types(arg, dialect=self.dialect)
1056
1057    if arg.is_type(*exp.DataType.REAL_TYPES, *exp.DataType.TEXT_TYPES):
1058        if arg.is_type(*exp.DataType.FLOAT_TYPES):
1059            # float types need to be rounded first due to precision loss
1060            arg = exp.func("ROUND", arg)
1061
1062        arg = exp.cast(arg, exp.DType.INT)
1063
1064    return self.func(func_name, arg)
1065
1066
1067def _literal_sql_with_ws_chr(self: DuckDBGenerator, literal: str) -> str:
1068    # DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly
1069    if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
1070        return self.sql(exp.Literal.string(literal))
1071
1072    sql_segments: list[str] = []
1073    for is_ws_control, group in groupby(literal, key=lambda ch: ch in WS_CONTROL_CHARS_TO_DUCK):
1074        if is_ws_control:
1075            for ch in group:
1076                duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK[ch]
1077                sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code))))
1078        else:
1079            sql_segments.append(self.sql(exp.Literal.string("".join(group))))
1080
1081    sql = " || ".join(sql_segments)
1082    return sql if len(sql_segments) == 1 else f"({sql})"
1083
1084
1085def _escape_regex_metachars(
1086    self: DuckDBGenerator, delimiters: exp.Expr | None, delimiters_sql: str
1087) -> str:
1088    r"""
1089    Escapes regex metacharacters \ - ^ [ ] for use in character classes regex expressions.
1090
1091    Literal strings are escaped at transpile time, expressions handled with REPLACE() calls.
1092    """
1093    if not delimiters:
1094        return delimiters_sql
1095
1096    if delimiters.is_string:
1097        literal_value = delimiters.this
1098        escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value)
1099        return _literal_sql_with_ws_chr(self, escaped_literal)
1100
1101    escaped_sql = delimiters_sql
1102    for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items():
1103        escaped_sql = self.func(
1104            "REPLACE",
1105            escaped_sql,
1106            self.sql(exp.Literal.string(raw)),
1107            self.sql(exp.Literal.string(escaped)),
1108        )
1109
1110    return escaped_sql
1111
1112
1113def _build_capitalization_sql(
1114    self: DuckDBGenerator,
1115    value_to_split: str,
1116    delimiters_sql: str,
1117) -> str:
1118    # empty string delimiter --> treat value as one word, no need to split
1119    if delimiters_sql == "''":
1120        return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))"
1121
1122    delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')"
1123    split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')"
1124
1125    # REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
1126    # We do not know whether the first segment is a delimiter or not, so we check the first character of the string
1127    # with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
1128    return self.func(
1129        "ARRAY_TO_STRING",
1130        exp.case()
1131        .when(
1132            f"REGEXP_MATCHES(LEFT({value_to_split}, 1), {delim_regex_sql})",
1133            self.func(
1134                "LIST_TRANSFORM",
1135                self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
1136                "(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
1137            ),
1138        )
1139        .else_(
1140            self.func(
1141                "LIST_TRANSFORM",
1142                self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
1143                "(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
1144            ),
1145        ),
1146        "''",
1147    )
1148
1149
1150def _initcap_sql(self: DuckDBGenerator, expression: exp.Initcap) -> str:
1151    this_sql = self.sql(expression, "this")
1152    delimiters = expression.args.get("expression")
1153    if delimiters is None:
1154        # fallback for manually created exp.Initcap w/o delimiters arg
1155        delimiters = exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)
1156    delimiters_sql = self.sql(delimiters)
1157
1158    escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql)
1159
1160    return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
1161
1162
1163def _boolxor_agg_sql(self: DuckDBGenerator, expression: exp.BoolxorAgg) -> str:
1164    """
1165    Snowflake's `BOOLXOR_AGG(col)` returns TRUE if exactly one input in `col` is TRUE, FALSE otherwise;
1166    Since DuckDB does not have a mapping function, we mimic the behavior by generating `COUNT_IF(col) = 1`.
1167
1168    DuckDB's COUNT_IF strictly requires boolean inputs, so cast if not already boolean.
1169    """
1170    return self.sql(
1171        exp.EQ(
1172            this=exp.CountIf(this=_cast_to_boolean(expression.this)),
1173            expression=exp.Literal.number(1),
1174        )
1175    )
1176
1177
1178def _bitshift_sql(
1179    self: DuckDBGenerator, expression: exp.BitwiseLeftShift | exp.BitwiseRightShift
1180) -> str:
1181    """
1182    Transform bitshift expressions for DuckDB by injecting BIT/INT128 casts.
1183
1184    DuckDB's bitwise shift operators don't work with BLOB/BINARY types, so we cast
1185    them to BIT for the operation, then cast the result back to the original type.
1186
1187    Note: Assumes type annotation has been applied with the source dialect.
1188    """
1189    operator = "<<" if isinstance(expression, exp.BitwiseLeftShift) else ">>"
1190    result_is_blob = False
1191    this = expression.this
1192
1193    if _is_binary(this):
1194        result_is_blob = True
1195        expression.set("this", exp.cast(this, exp.DType.BIT))
1196    elif expression.args.get("requires_int128"):
1197        this.replace(exp.cast(this, exp.DType.INT128))
1198
1199    result_sql = self.binary(expression, operator)
1200
1201    # Wrap in parentheses if parent is a bitwise operator to "fix" DuckDB precedence issue
1202    # DuckDB parses: a << b | c << d  as  (a << b | c) << d
1203    if isinstance(expression.parent, exp.Binary):
1204        result_sql = self.sql(exp.Paren(this=result_sql))
1205
1206    if result_is_blob:
1207        result_sql = self.sql(
1208            exp.Cast(this=result_sql, to=exp.DataType.from_str("BLOB", dialect="duckdb"))
1209        )
1210
1211    return result_sql
1212
1213
1214def _scale_rounding_sql(
1215    self: DuckDBGenerator,
1216    expression: exp.Expr,
1217    rounding_func: Type[exp.Expr],
1218) -> str | None:
1219    """
1220    Handle scale parameter transformation for rounding functions.
1221
1222    DuckDB doesn't support the scale parameter for certain functions (e.g., FLOOR, CEIL),
1223    so we transform: FUNC(x, n) to ROUND(FUNC(x * 10^n) / 10^n, n)
1224
1225    Args:
1226        self: The DuckDB generator instance
1227        expression: The expression to transform (must have 'this', 'decimals', and 'to' args)
1228        rounding_func: The rounding function class to use in the transformation
1229
1230    Returns:
1231        The transformed SQL string if decimals parameter exists, None otherwise
1232    """
1233    decimals = expression.args.get("decimals")
1234
1235    if decimals is None or expression.args.get("to") is not None:
1236        return None
1237
1238    this = expression.this
1239    if isinstance(this, exp.Binary):
1240        this = exp.Paren(this=this)
1241
1242    n_int = decimals
1243    if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
1244        n_int = exp.cast(decimals, exp.DType.INT)
1245
1246    pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int)
1247    rounded = rounding_func(this=exp.Mul(this=this, expression=pow_))
1248    result = exp.Div(this=rounded, expression=pow_.copy())
1249
1250    return self.round_sql(
1251        exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True)
1252    )
1253
1254
1255def _ceil_floor(self: DuckDBGenerator, expression: exp.Floor | exp.Ceil) -> str:
1256    scaled_sql = _scale_rounding_sql(self, expression, type(expression))
1257    if scaled_sql is not None:
1258        return scaled_sql
1259    return self.ceil_floor(expression)
1260
1261
1262def _regr_val_sql(
1263    self: DuckDBGenerator,
1264    expression: exp.RegrValx | exp.RegrValy,
1265) -> str:
1266    """
1267    Transpile Snowflake's REGR_VALX/REGR_VALY to DuckDB equivalent.
1268
1269    REGR_VALX(y, x) returns NULL if y is NULL; otherwise returns x.
1270    REGR_VALY(y, x) returns NULL if x is NULL; otherwise returns y.
1271    """
1272    from sqlglot.optimizer.annotate_types import annotate_types
1273
1274    y = expression.this
1275    x = expression.expression
1276
1277    # Determine which argument to check for NULL and which to return based on expression type
1278    if isinstance(expression, exp.RegrValx):
1279        # REGR_VALX: check y for NULL, return x
1280        check_for_null = y
1281        return_value = x
1282        return_value_attr = "expression"
1283    else:
1284        # REGR_VALY: check x for NULL, return y
1285        check_for_null = x
1286        return_value = y
1287        return_value_attr = "this"
1288
1289    # Get the type from the return argument
1290    result_type = return_value.type
1291
1292    # If no type info, annotate the expression to infer types
1293    if not result_type or result_type.this == exp.DType.UNKNOWN:
1294        try:
1295            annotated = annotate_types(expression.copy(), dialect=self.dialect)
1296            result_type = getattr(annotated, return_value_attr).type
1297        except Exception:
1298            pass
1299
1300    # Default to DOUBLE for regression functions if type still unknown
1301    if not result_type or result_type.this == exp.DType.UNKNOWN:
1302        result_type = exp.DType.DOUBLE.into_expr()
1303
1304    # Cast NULL to the same type as return_value to avoid DuckDB type inference issues
1305    typed_null = exp.Cast(this=exp.Null(), to=result_type)
1306
1307    return self.sql(
1308        exp.If(
1309            this=exp.Is(this=check_for_null.copy(), expression=exp.Null()),
1310            true=typed_null,
1311            false=return_value.copy(),
1312        )
1313    )
1314
1315
1316def _maybe_corr_null_to_false(
1317    expression: exp.Filter | exp.Window | exp.Corr,
1318) -> exp.Filter | exp.Window | exp.Corr | None:
1319    corr = expression
1320    while isinstance(corr, (exp.Window, exp.Filter)):
1321        corr = corr.this
1322
1323    if not isinstance(corr, exp.Corr) or not corr.args.get("null_on_zero_variance"):
1324        return None
1325
1326    corr.set("null_on_zero_variance", False)
1327    return expression
1328
1329
1330def _date_from_parts_sql(self, expression: exp.DateFromParts) -> str:
1331    """
1332    Snowflake's DATE_FROM_PARTS allows out-of-range values for the month and day input.
1333    E.g., larger values (month=13, day=100), zero-values (month=0, day=0), negative values (month=-13, day=-100).
1334
1335    DuckDB's MAKE_DATE does not support out-of-range values, but DuckDB's INTERVAL type does.
1336
1337    We convert to date arithmetic:
1338    DATE_FROM_PARTS(year, month, day)
1339    - MAKE_DATE(year, 1, 1) + INTERVAL (month-1) MONTH + INTERVAL (day-1) DAY
1340    """
1341    year_expr = expression.args.get("year")
1342    month_expr = expression.args.get("month")
1343    day_expr = expression.args.get("day")
1344
1345    if expression.args.get("allow_overflow"):
1346        base_date: exp.Expr = exp.func(
1347            "MAKE_DATE", year_expr, exp.Literal.number(1), exp.Literal.number(1)
1348        )
1349
1350        if month_expr:
1351            base_date = base_date + exp.Interval(this=month_expr - 1, unit=exp.var("MONTH"))
1352
1353        if day_expr:
1354            base_date = base_date + exp.Interval(this=day_expr - 1, unit=exp.var("DAY"))
1355
1356        return self.sql(exp.cast(expression=base_date, to=exp.DType.DATE))
1357
1358    return self.func("MAKE_DATE", year_expr, month_expr, day_expr)
1359
1360
1361def _round_arg(arg: exp.Expr, round_input: bool | None = None) -> exp.Expr:
1362    if round_input:
1363        return exp.func("ROUND", arg, exp.Literal.number(0))
1364    return arg
1365
1366
1367def _boolnot_sql(self: DuckDBGenerator, expression: exp.Boolnot) -> str:
1368    arg = _round_arg(expression.this, expression.args.get("round_input"))
1369    return self.sql(exp.not_(exp.paren(arg)))
1370
1371
1372def _booland_sql(self: DuckDBGenerator, expression: exp.Booland) -> str:
1373    round_input = expression.args.get("round_input")
1374    left = _round_arg(expression.this, round_input)
1375    right = _round_arg(expression.expression, round_input)
1376    return self.sql(exp.paren(exp.and_(exp.paren(left), exp.paren(right), wrap=False)))
1377
1378
1379def _boolor_sql(self: DuckDBGenerator, expression: exp.Boolor) -> str:
1380    round_input = expression.args.get("round_input")
1381    left = _round_arg(expression.this, round_input)
1382    right = _round_arg(expression.expression, round_input)
1383    return self.sql(exp.paren(exp.or_(exp.paren(left), exp.paren(right), wrap=False)))
1384
1385
1386def _xor_sql(self: DuckDBGenerator, expression: exp.Xor) -> str:
1387    round_input = expression.args.get("round_input")
1388    left = _round_arg(expression.this, round_input)
1389    right = _round_arg(expression.expression, round_input)
1390    return self.sql(
1391        exp.or_(
1392            exp.paren(exp.and_(left.copy(), exp.paren(right.not_()), wrap=False)),
1393            exp.paren(exp.and_(exp.paren(left.not_()), right.copy(), wrap=False)),
1394            wrap=False,
1395        )
1396    )
1397
1398
1399def _explode_to_unnest_sql(self: DuckDBGenerator, expression: exp.Lateral) -> str:
1400    """Handle LATERAL VIEW EXPLODE/INLINE conversion to UNNEST for DuckDB."""
1401    explode = expression.this
1402
1403    if isinstance(explode, exp.Inline):
1404        # For INLINE, create CROSS JOIN LATERAL (SELECT UNNEST(..., max_depth => 2))
1405        # Build the UNNEST call with DuckDB-style named parameter
1406        unnest_expr = exp.Unnest(
1407            expressions=[
1408                explode.this,
1409                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)),
1410            ]
1411        )
1412        select_expr = exp.Select(expressions=[unnest_expr]).subquery()
1413
1414        alias_expr = expression.args.get("alias")
1415        if alias_expr and not alias_expr.this:
1416            # we need to provide a table name if not present
1417            alias_expr.set("this", exp.to_identifier(f"_u_{expression.index}"))
1418
1419        transformed_lateral_expr = exp.Lateral(this=select_expr, alias=alias_expr)
1420        cross_join_lateral_expr = exp.Join(this=transformed_lateral_expr, kind="CROSS")
1421
1422        return self.sql(cross_join_lateral_expr)
1423
1424    # For other cases, use the standard conversion
1425    return explode_to_unnest_sql(self, expression)
1426
1427
1428def _sha_sql(
1429    self: DuckDBGenerator,
1430    expression: exp.Expr,
1431    hash_func: str,
1432    is_binary: bool = False,
1433) -> str:
1434    arg = expression.this
1435
1436    # For SHA2 variants, check digest length (DuckDB only supports SHA256)
1437    if hash_func == "SHA256":
1438        length = expression.text("length") or "256"
1439        if length != "256":
1440            self.unsupported("DuckDB only supports SHA256 hashing algorithm.")
1441
1442    # Cast if type is incompatible with DuckDB
1443    if (
1444        arg.type
1445        and arg.type.this != exp.DType.UNKNOWN
1446        and not arg.is_type(*exp.DataType.TEXT_TYPES)
1447        and not _is_binary(arg)
1448    ):
1449        arg = exp.cast(arg, exp.DType.VARCHAR)
1450
1451    result = self.func(hash_func, arg)
1452    return self.func("UNHEX", result) if is_binary else result
1453
1454
1455class DuckDBGenerator(generator.Generator):
1456    PARAMETER_TOKEN = "$"
1457    NAMED_PLACEHOLDER_TOKEN = "$"
1458    JOIN_HINTS = False
1459    TABLE_HINTS = False
1460    QUERY_HINTS = False
1461    LIMIT_FETCH = "LIMIT"
1462    STRUCT_DELIMITER = ("(", ")")
1463    RENAME_TABLE_WITH_DB = False
1464    NVL2_SUPPORTED = False
1465    SEMI_ANTI_JOIN_WITH_SIDE = False
1466    TABLESAMPLE_KEYWORDS = "USING SAMPLE"
1467    TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
1468    LAST_DAY_SUPPORTS_DATE_PART = False
1469    JSON_KEY_VALUE_PAIR_SEP = ","
1470    IGNORE_NULLS_IN_FUNC = True
1471    IGNORE_NULLS_BEFORE_ORDER = False
1472    JSON_PATH_BRACKETED_KEY_SUPPORTED = False
1473    SUPPORTS_CREATE_TABLE_LIKE = False
1474    MULTI_ARG_DISTINCT = False
1475    CAN_IMPLEMENT_ARRAY_ANY = True
1476    SUPPORTS_TO_NUMBER = False
1477    SELECT_KINDS: tuple[str, ...] = ()
1478    SUPPORTS_DECODE_CASE = False
1479    SUPPORTS_DROP_ALTER_ICEBERG_PROPERTY = False
1480
1481    AFTER_HAVING_MODIFIER_TRANSFORMS = generator.AFTER_HAVING_MODIFIER_TRANSFORMS
1482    SUPPORTS_WINDOW_EXCLUDE = True
1483    COPY_HAS_INTO_KEYWORD = False
1484    STAR_EXCEPT = "EXCLUDE"
1485    PAD_FILL_PATTERN_IS_REQUIRED = True
1486    ARRAY_SIZE_DIM_REQUIRED: bool | None = False
1487    NORMALIZE_EXTRACT_DATE_PARTS = True
1488    SUPPORTS_LIKE_QUANTIFIERS = False
1489    SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
1490
1491    TRANSFORMS = {
1492        **generator.Generator.TRANSFORMS,
1493        exp.AnyValue: _anyvalue_sql,
1494        exp.ApproxDistinct: approx_count_distinct_sql,
1495        exp.Boolnot: _boolnot_sql,
1496        exp.Booland: _booland_sql,
1497        exp.Boolor: _boolor_sql,
1498        exp.Array: transforms.preprocess(
1499            [transforms.inherit_struct_field_names],
1500            generator=inline_array_unless_query,
1501        ),
1502        exp.ArrayAppend: array_append_sql("LIST_APPEND"),
1503        exp.ArrayCompact: array_compact_sql,
1504        exp.ArrayConstructCompact: lambda self, e: self.sql(
1505            exp.ArrayCompact(this=exp.Array(expressions=e.expressions))
1506        ),
1507        exp.ArrayConcat: array_concat_sql("LIST_CONCAT"),
1508        exp.ArrayContains: _array_contains_sql,
1509        exp.ArrayOverlaps: _array_overlaps_sql,
1510        exp.ArrayFilter: rename_func("LIST_FILTER"),
1511        exp.ArrayInsert: _array_insert_sql,
1512        exp.ArrayPosition: lambda self, e: (
1513            self.sql(
1514                exp.Sub(
1515                    this=exp.ArrayPosition(this=e.this, expression=e.expression),
1516                    expression=exp.Literal.number(1),
1517                )
1518            )
1519            if e.args.get("zero_based")
1520            else self.func("ARRAY_POSITION", e.this, e.expression)
1521        ),
1522        exp.ArrayRemoveAt: _array_remove_at_sql,
1523        exp.ArrayRemove: remove_from_array_using_filter,
1524        exp.ArraySort: _array_sort_sql,
1525        exp.ArrayPrepend: array_append_sql("LIST_PREPEND", swap_params=True),
1526        exp.ArraySum: rename_func("LIST_SUM"),
1527        exp.ArrayMax: rename_func("LIST_MAX"),
1528        exp.ArrayMin: rename_func("LIST_MIN"),
1529        exp.Base64DecodeBinary: lambda self, e: _base64_decode_sql(self, e, to_string=False),
1530        exp.Base64DecodeString: lambda self, e: _base64_decode_sql(self, e, to_string=True),
1531        exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
1532        exp.BitwiseAndAgg: _bitwise_agg_sql,
1533        exp.BitwiseCount: rename_func("BIT_COUNT"),
1534        exp.BitwiseLeftShift: _bitshift_sql,
1535        exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
1536        exp.BitwiseOrAgg: _bitwise_agg_sql,
1537        exp.BitwiseRightShift: _bitshift_sql,
1538        exp.BitwiseXorAgg: _bitwise_agg_sql,
1539        exp.CommentColumnConstraint: no_comment_column_constraint_sql,
1540        exp.Corr: lambda self, e: self._corr_sql(e),
1541        exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
1542        exp.CurrentTime: lambda *_: "CURRENT_TIME",
1543        exp.CurrentSchemas: lambda self, e: self.func(
1544            "current_schemas", e.this if e.this else exp.true()
1545        ),
1546        exp.CurrentTimestamp: lambda self, e: (
1547            self.sql(
1548                exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC"))
1549            )
1550            if e.args.get("sysdate")
1551            else "CURRENT_TIMESTAMP"
1552        ),
1553        exp.CurrentVersion: rename_func("version"),
1554        exp.Localtime: unsupported_args("this")(lambda *_: "LOCALTIME"),
1555        exp.DayOfMonth: rename_func("DAYOFMONTH"),
1556        exp.DayOfWeek: rename_func("DAYOFWEEK"),
1557        exp.DayOfWeekIso: rename_func("ISODOW"),
1558        exp.DayOfYear: rename_func("DAYOFYEAR"),
1559        exp.Dayname: lambda self, e: (
1560            self.func("STRFTIME", e.this, exp.Literal.string("%a"))
1561            if e.args.get("abbreviated")
1562            else self.func("DAYNAME", e.this)
1563        ),
1564        exp.Monthname: lambda self, e: (
1565            self.func("STRFTIME", e.this, exp.Literal.string("%b"))
1566            if e.args.get("abbreviated")
1567            else self.func("MONTHNAME", e.this)
1568        ),
1569        exp.DataType: _datatype_sql,
1570        exp.Date: _date_sql,
1571        exp.DateAdd: _date_delta_to_binary_interval_op(),
1572        exp.DateFromParts: _date_from_parts_sql,
1573        exp.DateSub: _date_delta_to_binary_interval_op(),
1574        exp.DateDiff: _date_diff_sql,
1575        exp.DateStrToDate: datestrtodate_sql,
1576        exp.Datetime: no_datetime_sql,
1577        exp.DatetimeDiff: _date_diff_sql,
1578        exp.DatetimeSub: _date_delta_to_binary_interval_op(),
1579        exp.DatetimeAdd: _date_delta_to_binary_interval_op(),
1580        exp.DateToDi: lambda self, e: (
1581            f"CAST(STRFTIME({self.sql(e, 'this')}, {self.dialect.DATEINT_FORMAT}) AS INT)"
1582        ),
1583        exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
1584        exp.HexDecodeString: lambda self, e: self.sql(exp.Decode(this=exp.Unhex(this=e.this))),
1585        exp.DiToDate: lambda self, e: (
1586            f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {self.dialect.DATEINT_FORMAT}) AS DATE)"
1587        ),
1588        exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
1589        exp.EqualNull: lambda self, e: self.sql(
1590            exp.NullSafeEQ(this=e.this, expression=e.expression)
1591        ),
1592        exp.EuclideanDistance: rename_func("LIST_DISTANCE"),
1593        exp.GenerateDateArray: _generate_datetime_array_sql,
1594        exp.GenerateSeries: generate_series_sql("GENERATE_SERIES", "RANGE"),
1595        exp.GenerateTimestampArray: _generate_datetime_array_sql,
1596        exp.Getbit: getbit_sql,
1597        exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
1598        exp.Explode: rename_func("UNNEST"),
1599        exp.IcebergProperty: lambda *_: "",
1600        exp.IntDiv: lambda self, e: self.binary(e, "//"),
1601        exp.IsInf: rename_func("ISINF"),
1602        exp.IsNan: rename_func("ISNAN"),
1603        exp.IsNullValue: lambda self, e: self.sql(
1604            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("NULL"))
1605        ),
1606        exp.IsArray: lambda self, e: self.sql(
1607            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("ARRAY"))
1608        ),
1609        exp.Ceil: _ceil_floor,
1610        exp.Floor: _ceil_floor,
1611        exp.JSONBExists: rename_func("JSON_EXISTS"),
1612        exp.JSONExtract: _arrow_json_extract_sql,
1613        exp.JSONExtractArray: _json_extract_value_array_sql,
1614        exp.JSONFormat: _json_format_sql,
1615        exp.JSONValueArray: _json_extract_value_array_sql,
1616        exp.Lateral: _explode_to_unnest_sql,
1617        exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)),
1618        exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)),
1619        exp.Select: transforms.preprocess([_seq_to_range_in_generator]),
1620        exp.Seq1: lambda self, e: _seq_sql(self, e, 1),
1621        exp.Seq2: lambda self, e: _seq_sql(self, e, 2),
1622        exp.Seq4: lambda self, e: _seq_sql(self, e, 4),
1623        exp.Seq8: lambda self, e: _seq_sql(self, e, 8),
1624        exp.BoolxorAgg: _boolxor_agg_sql,
1625        exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
1626        exp.Initcap: _initcap_sql,
1627        exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
1628        exp.SHA: lambda self, e: _sha_sql(self, e, "SHA1"),
1629        exp.SHA1Digest: lambda self, e: _sha_sql(self, e, "SHA1", is_binary=True),
1630        exp.SHA2: lambda self, e: _sha_sql(self, e, "SHA256"),
1631        exp.SHA2Digest: lambda self, e: _sha_sql(self, e, "SHA256", is_binary=True),
1632        exp.MonthsBetween: months_between_sql,
1633        exp.NextDay: _day_navigation_sql,
1634        exp.PercentileCont: rename_func("QUANTILE_CONT"),
1635        exp.PercentileDisc: rename_func("QUANTILE_DISC"),
1636        # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
1637        # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
1638        exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
1639        exp.PreviousDay: _day_navigation_sql,
1640        exp.RegexpILike: lambda self, e: self.func(
1641            "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
1642        ),
1643        exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
1644        exp.RegrValx: _regr_val_sql,
1645        exp.RegrValy: _regr_val_sql,
1646        exp.Return: lambda self, e: self.sql(e, "this"),
1647        exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
1648        exp.StrToUnix: lambda self, e: self.func(
1649            "EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
1650        ),
1651        exp.Struct: _struct_sql,
1652        exp.Transform: rename_func("LIST_TRANSFORM"),
1653        exp.TimeAdd: _date_delta_to_binary_interval_op(),
1654        exp.TimeSub: _date_delta_to_binary_interval_op(),
1655        exp.Time: no_time_sql,
1656        exp.TimeDiff: _timediff_sql,
1657        exp.Timestamp: no_timestamp_sql,
1658        exp.TimestampAdd: _date_delta_to_binary_interval_op(),
1659        exp.TimestampDiff: lambda self, e: self.func(
1660            "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
1661        ),
1662        exp.TimestampSub: _date_delta_to_binary_interval_op(),
1663        exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DType.DATE)),
1664        exp.TimeStrToTime: timestrtotime_sql,
1665        exp.TimeStrToUnix: lambda self, e: self.func(
1666            "EPOCH", exp.cast(e.this, exp.DType.TIMESTAMP)
1667        ),
1668        exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
1669        exp.ToBoolean: _to_boolean_sql,
1670        exp.ToVariant: lambda self, e: self.sql(
1671            exp.cast(e.this, exp.DataType.from_str("VARIANT", dialect="duckdb"))
1672        ),
1673        exp.TimeToUnix: rename_func("EPOCH"),
1674        exp.TsOrDiToDi: lambda self, e: (
1675            f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)"
1676        ),
1677        exp.TsOrDsAdd: _date_delta_to_binary_interval_op(),
1678        exp.TsOrDsDiff: lambda self, e: self.func(
1679            "DATE_DIFF",
1680            f"'{e.args.get('unit') or 'DAY'}'",
1681            exp.cast(e.expression, exp.DType.TIMESTAMP),
1682            exp.cast(e.this, exp.DType.TIMESTAMP),
1683        ),
1684        exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
1685        exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
1686        exp.UnixSeconds: lambda self, e: self.sql(
1687            exp.cast(self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DType.BIGINT)
1688        ),
1689        exp.UnixToStr: lambda self, e: self.func(
1690            "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
1691        ),
1692        exp.DatetimeTrunc: lambda self, e: self.func(
1693            "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DType.DATETIME)
1694        ),
1695        exp.UnixToTime: _unix_to_time_sql,
1696        exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
1697        exp.VariancePop: rename_func("VAR_POP"),
1698        exp.WeekOfYear: rename_func("WEEKOFYEAR"),
1699        exp.YearOfWeek: lambda self, e: self.sql(
1700            exp.Extract(
1701                this=exp.Var(this="ISOYEAR"),
1702                expression=e.this,
1703            )
1704        ),
1705        exp.YearOfWeekIso: lambda self, e: self.sql(
1706            exp.Extract(
1707                this=exp.Var(this="ISOYEAR"),
1708                expression=e.this,
1709            )
1710        ),
1711        exp.Xor: _xor_sql,
1712        exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1713        exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1714        exp.DateBin: rename_func("TIME_BUCKET"),
1715        exp.LastDay: _last_day_sql,
1716    }
1717
1718    SUPPORTED_JSON_PATH_PARTS = {
1719        exp.JSONPathKey,
1720        exp.JSONPathRoot,
1721        exp.JSONPathSubscript,
1722        exp.JSONPathWildcard,
1723    }
1724
1725    TYPE_MAPPING = {
1726        **generator.Generator.TYPE_MAPPING,
1727        exp.DType.BINARY: "BLOB",
1728        exp.DType.BPCHAR: "TEXT",
1729        exp.DType.CHAR: "TEXT",
1730        exp.DType.DATETIME: "TIMESTAMP",
1731        exp.DType.DECFLOAT: "DECIMAL",
1732        exp.DType.FLOAT: "REAL",
1733        exp.DType.JSONB: "JSON",
1734        exp.DType.NCHAR: "TEXT",
1735        exp.DType.NVARCHAR: "TEXT",
1736        exp.DType.UINT: "UINTEGER",
1737        exp.DType.VARBINARY: "BLOB",
1738        exp.DType.ROWVERSION: "BLOB",
1739        exp.DType.VARCHAR: "TEXT",
1740        exp.DType.TIMESTAMPLTZ: "TIMESTAMPTZ",
1741        exp.DType.TIMESTAMPNTZ: "TIMESTAMP",
1742        exp.DType.TIMESTAMP_S: "TIMESTAMP_S",
1743        exp.DType.TIMESTAMP_MS: "TIMESTAMP_MS",
1744        exp.DType.TIMESTAMP_NS: "TIMESTAMP_NS",
1745        exp.DType.BIGDECIMAL: "DECIMAL",
1746    }
1747
1748    TYPE_PARAM_SETTINGS = {
1749        **generator.Generator.TYPE_PARAM_SETTINGS,
1750        exp.DType.BIGDECIMAL: ((38, 5), (38, 38)),
1751        exp.DType.DECFLOAT: ((38, 5), (38, 38)),
1752    }
1753
1754    # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
1755    RESERVED_KEYWORDS = {
1756        "array",
1757        "analyse",
1758        "union",
1759        "all",
1760        "when",
1761        "in_p",
1762        "default",
1763        "create_p",
1764        "window",
1765        "asymmetric",
1766        "to",
1767        "else",
1768        "localtime",
1769        "from",
1770        "end_p",
1771        "select",
1772        "current_date",
1773        "foreign",
1774        "with",
1775        "grant",
1776        "session_user",
1777        "or",
1778        "except",
1779        "references",
1780        "fetch",
1781        "limit",
1782        "group_p",
1783        "leading",
1784        "into",
1785        "collate",
1786        "offset",
1787        "do",
1788        "then",
1789        "localtimestamp",
1790        "check_p",
1791        "lateral_p",
1792        "current_role",
1793        "where",
1794        "asc_p",
1795        "placing",
1796        "desc_p",
1797        "user",
1798        "unique",
1799        "initially",
1800        "column",
1801        "both",
1802        "some",
1803        "as",
1804        "any",
1805        "only",
1806        "deferrable",
1807        "null_p",
1808        "current_time",
1809        "true_p",
1810        "table",
1811        "case",
1812        "trailing",
1813        "variadic",
1814        "for",
1815        "on",
1816        "distinct",
1817        "false_p",
1818        "not",
1819        "constraint",
1820        "current_timestamp",
1821        "returning",
1822        "primary",
1823        "intersect",
1824        "having",
1825        "analyze",
1826        "current_user",
1827        "and",
1828        "cast",
1829        "symmetric",
1830        "using",
1831        "order",
1832        "current_catalog",
1833    }
1834
1835    UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
1836
1837    # DuckDB doesn't generally support CREATE TABLE .. properties
1838    # https://duckdb.org/docs/sql/statements/create_table.html
1839    # There are a few exceptions (e.g. temporary tables) which are supported or
1840    # can be transpiled to DuckDB, so we explicitly override them accordingly
1841    PROPERTIES_LOCATION = {
1842        **{
1843            prop: exp.Properties.Location.UNSUPPORTED
1844            for prop in generator.Generator.PROPERTIES_LOCATION
1845        },
1846        exp.LikeProperty: exp.Properties.Location.POST_SCHEMA,
1847        exp.TemporaryProperty: exp.Properties.Location.POST_CREATE,
1848        exp.ReturnsProperty: exp.Properties.Location.POST_ALIAS,
1849        exp.SequenceProperties: exp.Properties.Location.POST_EXPRESSION,
1850        exp.IcebergProperty: exp.Properties.Location.POST_CREATE,
1851    }
1852
1853    IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS: t.ClassVar = _IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS
1854
1855    # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1856    ZIPF_TEMPLATE: exp.Expr = exp.maybe_parse(
1857        """
1858        WITH rand AS (SELECT :random_expr AS r),
1859        weights AS (
1860            SELECT i, 1.0 / POWER(i, :s) AS w
1861            FROM RANGE(1, :n + 1) AS t(i)
1862        ),
1863        cdf AS (
1864            SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1865            FROM weights
1866        )
1867        SELECT MIN(i)
1868        FROM cdf
1869        WHERE p >= (SELECT r FROM rand)
1870        """
1871    )
1872
1873    # Template for NORMAL transpilation using Box-Muller transform
1874    # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2))
1875    NORMAL_TEMPLATE: exp.Expr = exp.maybe_parse(
1876        ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))"
1877    )
1878
1879    # Template for generating a seeded pseudo-random value in [0, 1) from a hash
1880    SEEDED_RANDOM_TEMPLATE: exp.Expr = exp.maybe_parse("(ABS(HASH(:seed)) % 1000000) / 1000000.0")
1881
1882    # Template for generating signed and unsigned SEQ values within a specified range
1883    SEQ_UNSIGNED: exp.Expr = _SEQ_UNSIGNED
1884    SEQ_SIGNED: exp.Expr = _SEQ_SIGNED
1885
1886    # Template for MAP_CAT transpilation - Snowflake semantics:
1887    # 1. Returns NULL if either input is NULL
1888    # 2. For duplicate keys, prefers non-NULL value (COALESCE(m2[k], m1[k]))
1889    # 3. Filters out entries with NULL values from the result
1890    MAPCAT_TEMPLATE: exp.Expr = exp.maybe_parse(
1891        """
1892        CASE
1893            WHEN :map1 IS NULL OR :map2 IS NULL THEN NULL
1894            ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(
1895                LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(:map1), MAP_KEYS(:map2))),
1896                __k -> STRUCT_PACK(key := __k, value := COALESCE(:map2[__k], :map1[__k]))
1897            ), __x -> __x.value IS NOT NULL))
1898        END
1899        """
1900    )
1901
1902    # Mappings for EXTRACT/DATE_PART transpilation
1903    # Maps Snowflake specifiers unsupported in DuckDB to strftime format codes
1904    EXTRACT_STRFTIME_MAPPINGS: dict[str, tuple[str, str]] = {
1905        "WEEKISO": ("%V", "INTEGER"),
1906        "YEAROFWEEK": ("%G", "INTEGER"),
1907        "YEAROFWEEKISO": ("%G", "INTEGER"),
1908        "NANOSECOND": ("%n", "BIGINT"),
1909    }
1910
1911    # Maps epoch-based specifiers to DuckDB epoch functions
1912    EXTRACT_EPOCH_MAPPINGS: dict[str, str] = {
1913        "EPOCH_SECOND": "EPOCH",
1914        "EPOCH_MILLISECOND": "EPOCH_MS",
1915        "EPOCH_MICROSECOND": "EPOCH_US",
1916        "EPOCH_NANOSECOND": "EPOCH_NS",
1917    }
1918
1919    # Template for BITMAP_CONSTRUCT_AGG transpilation
1920    #
1921    # BACKGROUND:
1922    # Snowflake's BITMAP_CONSTRUCT_AGG aggregates integers into a compact binary bitmap.
1923    # Supports values in range 0-32767, this version returns NULL if any value is out of range
1924    # See: https://docs.snowflake.com/en/sql-reference/functions/bitmap_construct_agg
1925    # See: https://docs.snowflake.com/en/user-guide/querying-bitmaps-for-distinct-counts
1926    #
1927    # Snowflake uses two different formats based on the number of unique values:
1928    #
1929    # Format 1 - Small bitmap (< 5 unique values): Length of 10 bytes
1930    #   Bytes 0-1: Count of values as 2-byte big-endian integer (e.g., 3 values = 0x0003)
1931    #   Bytes 2-9: Up to 4 values, each as 2-byte little-endian integers, zero-padded to 8 bytes
1932    #   Example: Values [1, 2, 3] -> 0x0003 0100 0200 0300 0000 (hex)
1933    #                                count  v1   v2   v3   pad
1934    #
1935    # Format 2 - Large bitmap (>= 5 unique values): Length of 10 + (2 * count) bytes
1936    #   Bytes 0-9: Fixed header 0x08 followed by 9 zero bytes
1937    #   Bytes 10+: Each value as 2-byte little-endian integer (no padding)
1938    #   Example: Values [1,2,3,4,5] -> 0x08 00000000 00000000 00 0100 0200 0300 0400 0500
1939    #                                  hdr  ----9 zero bytes----  v1   v2   v3   v4   v5
1940    #
1941    # TEMPLATE STRUCTURE
1942    #
1943    # Phase 1 - Innermost subquery: Data preparation
1944    #   SELECT LIST_SORT(...) AS l
1945    #   - Aggregates all input values into a list, remove NULLs, duplicates and sorts
1946    #   Result: Clean, sorted list of unique non-null integers stored as 'l'
1947    #
1948    # Phase 2 - Middle subquery: Hex string construction
1949    #   LIST_TRANSFORM(...)
1950    #   - Converts each integer to 2-byte little-endian hex representation
1951    #   - & 255 extracts low byte, >> 8 extracts high byte
1952    #   - LIST_REDUCE: Concatenates all hex pairs into single string 'h'
1953    #   Result: Hex string of all values
1954    #
1955    # Phase 3 - Outer SELECT: Final bitmap assembly
1956    #   LENGTH(l) < 5:
1957    #   - Small format: 2-byte count (big-endian via %04X) + values + zero padding
1958    #   LENGTH(l) >= 5:
1959    #   - Large format: Fixed 10-byte header + values (no padding needed)
1960    #   Result: Complete binary bitmap as BLOB
1961    #
1962    BITMAP_CONSTRUCT_AGG_TEMPLATE: exp.Expr = exp.maybe_parse(
1963        """
1964        SELECT CASE
1965            WHEN l IS NULL OR LENGTH(l) = 0 THEN NULL
1966            WHEN LENGTH(l) != LENGTH(LIST_FILTER(l, __v -> __v BETWEEN 0 AND 32767)) THEN NULL
1967            WHEN LENGTH(l) < 5 THEN UNHEX(PRINTF('%04X', LENGTH(l)) || h || REPEAT('00', GREATEST(0, 4 - LENGTH(l)) * 2))
1968            ELSE UNHEX('08000000000000000000' || h)
1969        END
1970        FROM (
1971            SELECT l, COALESCE(LIST_REDUCE(
1972                LIST_TRANSFORM(l, __x -> PRINTF('%02X%02X', CAST(__x AS INT) & 255, (CAST(__x AS INT) >> 8) & 255)),
1973                (__a, __b) -> __a || __b, ''
1974            ), '') AS h
1975            FROM (SELECT LIST_SORT(LIST_DISTINCT(LIST(:arg) FILTER(NOT :arg IS NULL))) AS l)
1976        )
1977        """
1978    )
1979
1980    # Template for RANDSTR transpilation - placeholders get replaced with actual parameters
1981    RANDSTR_TEMPLATE: exp.Expr = exp.maybe_parse(
1982        f"""
1983        SELECT LISTAGG(
1984            SUBSTRING(
1985                '{RANDSTR_CHAR_POOL}',
1986                1 + CAST(FLOOR(random_value * 62) AS INT),
1987                1
1988            ),
1989            ''
1990        )
1991        FROM (
1992            SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
1993            FROM RANGE(:length) AS t(i)
1994        )
1995        """,
1996    )
1997
1998    # Template for MINHASH transpilation
1999    # Computes k minimum hash values across aggregated data using DuckDB list functions
2000    # Returns JSON matching Snowflake format: {"state": [...], "type": "minhash", "version": 1}
2001    MINHASH_TEMPLATE: exp.Expr = exp.maybe_parse(
2002        """
2003        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY seed), 'type', 'minhash', 'version', 1)
2004        FROM (
2005            SELECT seed, LIST_MIN(LIST_TRANSFORM(vals, __v -> HASH(CAST(__v AS VARCHAR) || CAST(seed AS VARCHAR)))) AS min_h
2006            FROM (SELECT LIST(:expr) AS vals), RANGE(0, :k) AS t(seed)
2007        )
2008        """,
2009    )
2010
2011    # Template for MINHASH_COMBINE transpilation
2012    # Combines multiple minhash signatures by taking element-wise minimum
2013    MINHASH_COMBINE_TEMPLATE: exp.Expr = exp.maybe_parse(
2014        """
2015        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY idx), 'type', 'minhash', 'version', 1)
2016        FROM (
2017            SELECT
2018                pos AS idx,
2019                MIN(val) AS min_h
2020            FROM
2021                UNNEST(LIST(:expr)) AS _(sig),
2022                UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS t(val, pos)
2023            GROUP BY pos
2024        )
2025        """,
2026    )
2027
2028    # Template for APPROXIMATE_SIMILARITY transpilation
2029    # Computes multi-way Jaccard similarity: fraction of positions where ALL signatures agree
2030    APPROXIMATE_SIMILARITY_TEMPLATE: exp.Expr = exp.maybe_parse(
2031        """
2032        SELECT CAST(SUM(CASE WHEN num_distinct = 1 THEN 1 ELSE 0 END) AS DOUBLE) / COUNT(*)
2033        FROM (
2034            SELECT pos, COUNT(DISTINCT h) AS num_distinct
2035            FROM (
2036                SELECT h, pos
2037                FROM UNNEST(LIST(:expr)) AS _(sig),
2038                     UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS s(h, pos)
2039            )
2040            GROUP BY pos
2041        )
2042        """,
2043    )
2044
2045    # Template for ARRAYS_ZIP transpilation
2046    # Snowflake pads to longest array; DuckDB LIST_ZIP truncates to shortest
2047    # Uses RANGE + indexing to match Snowflake behavior
2048    ARRAYS_ZIP_TEMPLATE: exp.Expr = exp.maybe_parse(
2049        """
2050        CASE WHEN :null_check THEN NULL
2051        WHEN :all_empty_check THEN [:empty_struct]
2052        ELSE LIST_TRANSFORM(RANGE(0, :max_len), __i -> :transform_struct)
2053        END
2054        """,
2055    )
2056
2057    UUID_V5_TEMPLATE: exp.Expr = exp.maybe_parse(
2058        """
2059        (SELECT
2060            LOWER(
2061                SUBSTR(h, 1, 8) || '-' ||
2062                SUBSTR(h, 9, 4) || '-' ||
2063                '5' || SUBSTR(h, 14, 3) || '-' ||
2064                FORMAT('{:02x}', CAST('0x' || SUBSTR(h, 17, 2) AS INT) & 63 | 128) || SUBSTR(h, 19, 2) || '-' ||
2065                SUBSTR(h, 21, 12)
2066            )
2067        FROM (
2068            SELECT SUBSTR(SHA1(UNHEX(REPLACE(:namespace, '-', '')) || ENCODE(:name, 'utf8')), 1, 32) AS h
2069        ))
2070        """
2071    )
2072
2073    # Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
2074    # Each element is paired with its 1-based position via LIST_ZIP, then filtered
2075    # by a comparison operator (supplied via :cond) that determines the operation:
2076    #   EXCEPT (>):        keep the N-th occurrence only if N > count in arr2
2077    #                      e.g. [2,2,2] EXCEPT [2,2] -> [2]
2078    #   INTERSECTION (<=): keep the N-th occurrence only if N <= count in arr2
2079    #                      e.g. [2,2,2] INTERSECT [2,2] -> [2,2]
2080    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2081    ARRAY_BAG_TEMPLATE: exp.Expr = exp.maybe_parse(
2082        """
2083        CASE
2084            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2085            ELSE LIST_TRANSFORM(
2086                LIST_FILTER(
2087                    LIST_ZIP(:arr1, GENERATE_SERIES(1, LEN(:arr1))),
2088                    pair -> :cond
2089                ),
2090                pair -> pair[0]
2091            )
2092        END
2093        """
2094    )
2095
2096    ARRAY_EXCEPT_CONDITION: exp.Expr = exp.maybe_parse(
2097        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2098        " > LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2099    )
2100
2101    ARRAY_INTERSECTION_CONDITION: exp.Expr = exp.maybe_parse(
2102        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2103        " <= LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2104    )
2105
2106    # Set semantics for ARRAY_EXCEPT. Deduplicates arr1 via LIST_DISTINCT, then
2107    # filters out any element that appears at least once in arr2.
2108    #   e.g. [1,1,2,3] EXCEPT [1] -> [2,3]
2109    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2110    ARRAY_EXCEPT_SET_TEMPLATE: exp.Expr = exp.maybe_parse(
2111        """
2112        CASE
2113            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2114            ELSE LIST_FILTER(
2115                LIST_DISTINCT(:arr1),
2116                e -> LEN(LIST_FILTER(:arr2, x -> x IS NOT DISTINCT FROM e)) = 0
2117            )
2118        END
2119        """
2120    )
2121
2122    STRTOK_TO_ARRAY_TEMPLATE: exp.Expr = exp.maybe_parse(
2123        """
2124        CASE WHEN :delimiter IS NULL THEN NULL
2125        ELSE LIST_FILTER(
2126            REGEXP_SPLIT_TO_ARRAY(:string, CASE WHEN :delimiter = '' THEN '.^' ELSE CONCAT('[', :escaped, ']') END),
2127            x -> NOT x = ''
2128        ) END
2129        """
2130    )
2131
2132    # Template for STRTOK function transpilation
2133    #
2134    # DuckDB itself doesn't have a strtok function. This handles the transpilation from Snowflake to DuckDB.
2135    # We may need to adjust this if we want to support transpilation from other dialects
2136    #
2137    # CASE
2138    #     -- Snowflake: empty delimiter + empty input string -> NULL
2139    #     WHEN delimiter = '' AND input_str = '' THEN NULL
2140    #
2141    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return input string if index is 1
2142    #     WHEN delimiter = '' AND index = 1 THEN input_str
2143    #
2144    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return NULL if index is not 1
2145    #     WHEN delimiter = '' THEN NULL
2146    #
2147    #     -- Snowflake: negative indices return NULL
2148    #     WHEN index < 0 THEN NULL
2149    #
2150    #     -- Snowflake: return NULL if any argument is NULL
2151    #     WHEN input_str IS NULL OR delimiter IS NULL OR index IS NULL THEN NULL
2152    #
2153    #
2154    #     ELSE LIST_FILTER(
2155    #         REGEXP_SPLIT_TO_ARRAY(
2156    #             input_str,
2157    #             CASE
2158    #                 -- if delimiter is '', we don't want to surround it with '[' and ']' as '[]' is invalid for DuckDB
2159    #                 WHEN delimiter = '' THEN ''
2160    #
2161    #                 -- handle problematic regex characters in delimiter with REGEXP_REPLACE
2162    #                 -- turn delimiter into a regex char set, otherwise DuckDB will match in order, which we don't want
2163    #                 ELSE '[' || REGEXP_REPLACE(delimiter, problematic_char_set, '\\\1', 'g') || ']'
2164    #             END
2165    #         ),
2166    #
2167    #         -- Snowflake: don't return empty strings
2168    #         x -> NOT x = ''
2169    #     )[index]
2170    # END
2171    STRTOK_TEMPLATE: exp.Expr = exp.maybe_parse(
2172        """
2173        CASE
2174            WHEN :delimiter = '' AND :string = '' THEN NULL
2175            WHEN :delimiter = '' AND :part_index = 1 THEN :string
2176            WHEN :delimiter = '' THEN NULL
2177            WHEN :part_index < 0 THEN NULL
2178            WHEN :string IS NULL OR :delimiter IS NULL OR :part_index IS NULL THEN NULL
2179            ELSE :base_func
2180        END
2181        """
2182    )
2183
2184    def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str:
2185        cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2))
2186        return self.sql(
2187            exp.replace_placeholders(self.ARRAY_BAG_TEMPLATE, arr1=arr1, arr2=arr2, cond=cond)
2188        )
2189
2190    def timeslice_sql(self, expression: exp.TimeSlice) -> str:
2191        """
2192        Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
2193
2194        Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
2195        DuckDB:    time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
2196
2197        For 'END' kind, add the interval to get the end of the slice.
2198        For DATE type with 'END', cast result back to DATE to preserve type.
2199        """
2200        date_expr = expression.this
2201        slice_length = expression.expression
2202        unit = expression.unit
2203        kind = expression.text("kind").upper()
2204
2205        # Create INTERVAL expression: INTERVAL 'N' UNIT
2206        interval_expr = exp.Interval(this=slice_length, unit=unit)
2207
2208        # Create base time_bucket expression
2209        time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
2210
2211        # Check if we need the end of the slice (default is start)
2212        if not kind == "END":
2213            # For 'START', return time_bucket directly
2214            return self.sql(time_bucket_expr)
2215
2216        # For 'END', add the interval to get end of slice
2217        add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
2218
2219        # If input is DATE type, cast result back to DATE to preserve type
2220        # DuckDB converts DATE to TIMESTAMP when adding intervals
2221        if date_expr.is_type(exp.DType.DATE):
2222            return self.sql(exp.cast(add_expr, exp.DType.DATE))
2223
2224        return self.sql(add_expr)
2225
2226    def bitmapbucketnumber_sql(self, expression: exp.BitmapBucketNumber) -> str:
2227        """
2228        Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
2229
2230        Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
2231        - Each bucket covers 32,768 values
2232        - Bucket numbering starts at 1
2233        - Formula: ((value - 1) // 32768) + 1 for positive values
2234
2235        For non-positive values (0 and negative), we use value // 32768 to avoid
2236        producing bucket 0 or positive bucket IDs for negative inputs.
2237        """
2238        value = expression.this
2239
2240        positive_formula = ((value - 1) // 32768) + 1
2241        non_positive_formula = value // 32768
2242
2243        # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
2244        case_expr = (
2245            exp.case()
2246            .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
2247            .else_(non_positive_formula)
2248        )
2249        return self.sql(case_expr)
2250
2251    def bitmapbitposition_sql(self, expression: exp.BitmapBitPosition) -> str:
2252        """
2253        Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
2254
2255        Snowflake's BITMAP_BIT_POSITION behavior:
2256        - For n <= 0: returns ABS(n) % 32768
2257        - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
2258        """
2259        this = expression.this
2260
2261        return self.sql(
2262            exp.Mod(
2263                this=exp.Paren(
2264                    this=exp.If(
2265                        this=exp.GT(this=this, expression=exp.Literal.number(0)),
2266                        true=this - exp.Literal.number(1),
2267                        false=exp.Abs(this=this),
2268                    )
2269                ),
2270                expression=MAX_BIT_POSITION,
2271            )
2272        )
2273
2274    def bitmapconstructagg_sql(self, expression: exp.BitmapConstructAgg) -> str:
2275        """
2276        Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
2277        Uses a pre-parsed template with placeholders replaced by expression nodes.
2278
2279        Snowflake bitmap format:
2280        - Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
2281        - Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
2282        """
2283        arg = expression.this
2284        return (
2285            f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
2286        )
2287
2288    def compress_sql(self, expression: exp.Compress) -> str:
2289        self.unsupported("DuckDB does not support the COMPRESS() function")
2290        return self.function_fallback_sql(expression)
2291
2292    def encrypt_sql(self, expression: exp.Encrypt) -> str:
2293        self.unsupported("ENCRYPT is not supported in DuckDB")
2294        return self.function_fallback_sql(expression)
2295
2296    def decrypt_sql(self, expression: exp.Decrypt) -> str:
2297        func_name = "TRY_DECRYPT" if expression.args.get("safe") else "DECRYPT"
2298        self.unsupported(f"{func_name} is not supported in DuckDB")
2299        return self.function_fallback_sql(expression)
2300
2301    def decryptraw_sql(self, expression: exp.DecryptRaw) -> str:
2302        func_name = "TRY_DECRYPT_RAW" if expression.args.get("safe") else "DECRYPT_RAW"
2303        self.unsupported(f"{func_name} is not supported in DuckDB")
2304        return self.function_fallback_sql(expression)
2305
2306    def encryptraw_sql(self, expression: exp.EncryptRaw) -> str:
2307        self.unsupported("ENCRYPT_RAW is not supported in DuckDB")
2308        return self.function_fallback_sql(expression)
2309
2310    def parseurl_sql(self, expression: exp.ParseUrl) -> str:
2311        self.unsupported("PARSE_URL is not supported in DuckDB")
2312        return self.function_fallback_sql(expression)
2313
2314    def parseip_sql(self, expression: exp.ParseIp) -> str:
2315        self.unsupported("PARSE_IP is not supported in DuckDB")
2316        return self.function_fallback_sql(expression)
2317
2318    def decompressstring_sql(self, expression: exp.DecompressString) -> str:
2319        self.unsupported("DECOMPRESS_STRING is not supported in DuckDB")
2320        return self.function_fallback_sql(expression)
2321
2322    def decompressbinary_sql(self, expression: exp.DecompressBinary) -> str:
2323        self.unsupported("DECOMPRESS_BINARY is not supported in DuckDB")
2324        return self.function_fallback_sql(expression)
2325
2326    def jarowinklersimilarity_sql(self, expression: exp.JarowinklerSimilarity) -> str:
2327        this = expression.this
2328        expr = expression.expression
2329
2330        if expression.args.get("case_insensitive"):
2331            this = exp.Upper(this=this)
2332            expr = exp.Upper(this=expr)
2333
2334        result = exp.func("JARO_WINKLER_SIMILARITY", this, expr)
2335
2336        if expression.args.get("integer_scale"):
2337            result = exp.cast(result * 100, "INTEGER")
2338
2339        return self.sql(result)
2340
2341    def nthvalue_sql(self, expression: exp.NthValue) -> str:
2342        from_first = expression.args.get("from_first", True)
2343        if not from_first:
2344            self.unsupported("DuckDB's NTH_VALUE doesn't support starting from the end ")
2345
2346        return self.function_fallback_sql(expression)
2347
2348    def randstr_sql(self, expression: exp.Randstr) -> str:
2349        """
2350        Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
2351        Uses a pre-parsed template with placeholders replaced by expression nodes.
2352
2353        RANDSTR(length, generator) generates a random string of specified length.
2354        - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
2355        - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
2356        - No generator: Use default seed value
2357        """
2358        length = expression.this
2359        generator = expression.args.get("generator")
2360
2361        if generator:
2362            if isinstance(generator, exp.Rand):
2363                # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
2364                seed_value = generator.this or generator
2365            else:
2366                # Const/int or other expression - use as seed directly
2367                seed_value = generator
2368        else:
2369            # No generator specified, use default seed (arbitrary but deterministic)
2370            seed_value = exp.Literal.number(RANDSTR_SEED)
2371
2372        replacements = {"seed": seed_value, "length": length}
2373        return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
2374
2375    @unsupported_args("finish")
2376    def reduce_sql(self, expression: exp.Reduce) -> str:
2377        array_arg = expression.this
2378        initial_value = expression.args.get("initial")
2379        merge_lambda = expression.args.get("merge")
2380
2381        if merge_lambda:
2382            merge_lambda.set("colon", True)
2383
2384        return self.func("list_reduce", array_arg, merge_lambda, initial_value)
2385
2386    def zipf_sql(self, expression: exp.Zipf) -> str:
2387        """
2388        Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
2389        Uses a pre-parsed template with placeholders replaced by expression nodes.
2390        """
2391        s = expression.this
2392        n = expression.args["elementcount"]
2393        gen = expression.args["gen"]
2394
2395        if not isinstance(gen, exp.Rand):
2396            # (ABS(HASH(seed)) % 1000000) / 1000000.0
2397            random_expr: exp.Expr = exp.Div(
2398                this=exp.Paren(
2399                    this=exp.Mod(
2400                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
2401                        expression=exp.Literal.number(1000000),
2402                    )
2403                ),
2404                expression=exp.Literal.number(1000000.0),
2405            )
2406        else:
2407            # Use RANDOM() for non-deterministic output
2408            random_expr = exp.Rand()
2409
2410        replacements = {"s": s, "n": n, "random_expr": random_expr}
2411        return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
2412
2413    def tobinary_sql(self, expression: exp.ToBinary) -> str:
2414        """
2415        TO_BINARY and TRY_TO_BINARY transpilation:
2416        - 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
2417        - 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
2418        - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')
2419
2420        For TRY_TO_BINARY (safe=True), wrap with TRY():
2421        - 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
2422        """
2423        value = expression.this
2424        format_arg = expression.args.get("format")
2425        is_safe = expression.args.get("safe")
2426        is_binary = _is_binary(expression)
2427
2428        if not format_arg and not is_binary:
2429            func_name = "TRY_TO_BINARY" if is_safe else "TO_BINARY"
2430            return self.func(func_name, value)
2431
2432        # Snowflake defaults to HEX encoding when no format is specified
2433        fmt = format_arg.name.upper() if format_arg else "HEX"
2434
2435        if fmt in ("UTF-8", "UTF8"):
2436            # DuckDB ENCODE always uses UTF-8, no charset parameter needed
2437            result = self.func("ENCODE", value)
2438        elif fmt == "BASE64":
2439            result = self.func("FROM_BASE64", value)
2440        elif fmt == "HEX":
2441            result = self.func("UNHEX", value)
2442        else:
2443            if is_safe:
2444                return self.sql(exp.null())
2445            else:
2446                self.unsupported(f"format {fmt} is not supported")
2447                result = self.func("TO_BINARY", value)
2448        return f"TRY({result})" if is_safe else result
2449
2450    def tonumber_sql(self, expression: exp.ToNumber) -> str:
2451        fmt = expression.args.get("format")
2452        precision = expression.args.get("precision")
2453        scale = expression.args.get("scale")
2454
2455        if not fmt and precision and scale:
2456            return self.sql(
2457                exp.cast(
2458                    expression.this, f"DECIMAL({precision.name}, {scale.name})", dialect="duckdb"
2459                )
2460            )
2461
2462        return super().tonumber_sql(expression)
2463
2464    def _greatest_least_sql(self, expression: exp.Greatest | exp.Least) -> str:
2465        """
2466        Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
2467
2468        - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL
2469        - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
2470        """
2471        # Get all arguments
2472        all_args = [expression.this, *expression.expressions]
2473        fallback_sql = self.function_fallback_sql(expression)
2474
2475        if expression.args.get("ignore_nulls"):
2476            # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
2477            return self.sql(fallback_sql)
2478
2479        # return NULL if any argument is NULL
2480        case_expr = exp.case().when(
2481            exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
2482            exp.null(),
2483            copy=False,
2484        )
2485        case_expr.set("default", fallback_sql)
2486        return self.sql(case_expr)
2487
2488    def generator_sql(self, expression: exp.Generator) -> str:
2489        # Transpile Snowflake GENERATOR to DuckDB range()
2490        rowcount = expression.args.get("rowcount")
2491        time_limit = expression.args.get("time_limit")
2492
2493        if time_limit:
2494            self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
2495
2496        if not rowcount:
2497            self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
2498            return self.func("range", exp.Literal.number(0))
2499
2500        return self.func("range", rowcount)
2501
2502    def greatest_sql(self, expression: exp.Greatest) -> str:
2503        return self._greatest_least_sql(expression)
2504
2505    def least_sql(self, expression: exp.Least) -> str:
2506        return self._greatest_least_sql(expression)
2507
2508    def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str:
2509        if expression.args.get("colon"):
2510            prefix = "LAMBDA "
2511            arrow_sep = ":"
2512            wrap = False
2513        else:
2514            prefix = ""
2515
2516        lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap)
2517        return f"{prefix}{lambda_sql}"
2518
2519    def show_sql(self, expression: exp.Show) -> str:
2520        from_ = self.sql(expression, "from_")
2521        from_ = f" FROM {from_}" if from_ else ""
2522        return f"SHOW {expression.name}{from_}"
2523
2524    def soundex_sql(self, expression: exp.Soundex) -> str:
2525        self.unsupported("SOUNDEX is not supported in DuckDB")
2526        return self.func("SOUNDEX", expression.this)
2527
2528    def sortarray_sql(self, expression: exp.SortArray) -> str:
2529        arr = expression.this
2530        asc = expression.args.get("asc")
2531        nulls_first = expression.args.get("nulls_first")
2532
2533        if not isinstance(asc, exp.Boolean) and not isinstance(nulls_first, exp.Boolean):
2534            return self.func("LIST_SORT", arr, asc, nulls_first)
2535
2536        nulls_are_first = nulls_first == exp.true()
2537        nulls_first_sql = exp.Literal.string("NULLS FIRST") if nulls_are_first else None
2538
2539        if not isinstance(asc, exp.Boolean):
2540            return self.func("LIST_SORT", arr, asc, nulls_first_sql)
2541
2542        descending = asc == exp.false()
2543
2544        if not descending and not nulls_are_first:
2545            return self.func("LIST_SORT", arr)
2546        if not nulls_are_first:
2547            return self.func("ARRAY_REVERSE_SORT", arr)
2548        return self.func(
2549            "LIST_SORT",
2550            arr,
2551            exp.Literal.string("DESC" if descending else "ASC"),
2552            exp.Literal.string("NULLS FIRST"),
2553        )
2554
2555    def install_sql(self, expression: exp.Install) -> str:
2556        force = "FORCE " if expression.args.get("force") else ""
2557        this = self.sql(expression, "this")
2558        from_clause = expression.args.get("from_")
2559        from_clause = f" FROM {from_clause}" if from_clause else ""
2560        return f"{force}INSTALL {this}{from_clause}"
2561
2562    def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
2563        self.unsupported(
2564            "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
2565        )
2566        return self.function_fallback_sql(expression)
2567
2568    def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
2569        return self.sql(exp.cast(expression.this, exp.DType.TIMESTAMPTZ))
2570
2571    def strposition_sql(self, expression: exp.StrPosition) -> str:
2572        this = expression.this
2573        substr = expression.args.get("substr")
2574        position = expression.args.get("position")
2575
2576        # For BINARY/BLOB: DuckDB's STRPOS doesn't support BLOB types
2577        # Convert to HEX strings, use STRPOS, then convert hex position to byte position
2578        if _is_binary(this):
2579            # Build expression: STRPOS(HEX(haystack), HEX(needle))
2580            hex_strpos = exp.StrPosition(
2581                this=exp.Hex(this=this),
2582                substr=exp.Hex(this=substr),
2583            )
2584
2585            return self.sql(exp.cast((hex_strpos + 1) / 2, exp.DType.INT))
2586
2587        # For VARCHAR: handle clamp_position
2588        if expression.args.get("clamp_position") and position:
2589            expression = expression.copy()
2590            expression.set(
2591                "position",
2592                exp.If(
2593                    this=exp.LTE(this=position, expression=exp.Literal.number(0)),
2594                    true=exp.Literal.number(1),
2595                    false=position.copy(),
2596                ),
2597            )
2598
2599        return strposition_sql(self, expression)
2600
2601    def substring_sql(self, expression: exp.Substring) -> str:
2602        if expression.args.get("zero_start"):
2603            start = expression.args.get("start")
2604            length = expression.args.get("length")
2605
2606            if start := expression.args.get("start"):
2607                start = exp.If(this=start.eq(0), true=exp.Literal.number(1), false=start)
2608            if length := expression.args.get("length"):
2609                length = exp.If(this=length < 0, true=exp.Literal.number(0), false=length)
2610
2611            return self.func("SUBSTRING", expression.this, start, length)
2612
2613        return self.function_fallback_sql(expression)
2614
2615    def strtotime_sql(self, expression: exp.StrToTime) -> str:
2616        # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
2617        target_type = expression.args.get("target_type")
2618        needs_tz = target_type and target_type.this in (
2619            exp.DType.TIMESTAMPLTZ,
2620            exp.DType.TIMESTAMPTZ,
2621        )
2622
2623        if expression.args.get("safe"):
2624            formatted_time = self.format_time(expression)
2625            cast_type = exp.DType.TIMESTAMPTZ if needs_tz else exp.DType.TIMESTAMP
2626            return self.sql(
2627                exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
2628            )
2629
2630        base_sql = str_to_time_sql(self, expression)
2631        if needs_tz:
2632            return self.sql(
2633                exp.cast(
2634                    base_sql,
2635                    exp.DataType(this=exp.DType.TIMESTAMPTZ),
2636                )
2637            )
2638        return base_sql
2639
2640    def strtodate_sql(self, expression: exp.StrToDate) -> str:
2641        formatted_time = self.format_time(expression)
2642        function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
2643        return self.sql(
2644            exp.cast(
2645                self.func(function_name, expression.this, formatted_time),
2646                exp.DataType(this=exp.DType.DATE),
2647            )
2648        )
2649
2650    def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
2651        this = expression.this
2652        time_format = self.format_time(expression)
2653        safe = expression.args.get("safe")
2654        time_type = exp.DataType.from_str("TIME", dialect="duckdb")
2655        cast_expr = exp.TryCast if safe else exp.Cast
2656
2657        if time_format:
2658            func_name = "TRY_STRPTIME" if safe else "STRPTIME"
2659            strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
2660            return self.sql(cast_expr(this=strptime, to=time_type))
2661
2662        if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME):
2663            return self.sql(this)
2664
2665        return self.sql(cast_expr(this=this, to=time_type))
2666
2667    def currentdate_sql(self, expression: exp.CurrentDate) -> str:
2668        if not expression.this:
2669            return "CURRENT_DATE"
2670
2671        expr = exp.Cast(
2672            this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
2673            to=exp.DataType(this=exp.DType.DATE),
2674        )
2675        return self.sql(expr)
2676
2677    def checkjson_sql(self, expression: exp.CheckJson) -> str:
2678        arg = expression.this
2679        return self.sql(
2680            exp.case()
2681            .when(
2682                exp.or_(arg.is_(exp.Null()), arg.eq(""), exp.func("json_valid", arg)),
2683                exp.null(),
2684            )
2685            .else_(exp.Literal.string("Invalid JSON"))
2686        )
2687
2688    def parsejson_sql(self, expression: exp.ParseJSON) -> str:
2689        arg = expression.this
2690        if expression.args.get("safe"):
2691            return self.sql(
2692                exp.case()
2693                .when(exp.func("json_valid", arg), exp.cast(arg.copy(), "JSON"))
2694                .else_(exp.null())
2695            )
2696        return self.func("JSON", arg)
2697
2698    def unicode_sql(self, expression: exp.Unicode) -> str:
2699        if expression.args.get("empty_is_zero"):
2700            return self.sql(
2701                exp.case()
2702                .when(expression.this.eq(exp.Literal.string("")), exp.Literal.number(0))
2703                .else_(exp.Anonymous(this="UNICODE", expressions=[expression.this]))
2704            )
2705
2706        return self.func("UNICODE", expression.this)
2707
2708    def stripnullvalue_sql(self, expression: exp.StripNullValue) -> str:
2709        return self.sql(
2710            exp.case()
2711            .when(exp.func("json_type", expression.this).eq("NULL"), exp.null())
2712            .else_(expression.this)
2713        )
2714
2715    def trunc_sql(self, expression: exp.Trunc) -> str:
2716        decimals = expression.args.get("decimals")
2717        if (
2718            expression.args.get("fractions_supported")
2719            and decimals
2720            and not decimals.is_type(exp.DType.INT)
2721        ):
2722            decimals = exp.cast(decimals, exp.DType.INT, dialect="duckdb")
2723
2724        return self.func("TRUNC", expression.this, decimals)
2725
2726    def normal_sql(self, expression: exp.Normal) -> str:
2727        """
2728        Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
2729
2730        Uses the Box-Muller transform via NORMAL_TEMPLATE.
2731        """
2732        mean = expression.this
2733        stddev = expression.args["stddev"]
2734        gen: exp.Expr = expression.args["gen"]
2735
2736        # Build two uniform random values [0, 1) for Box-Muller transform
2737        if isinstance(gen, exp.Rand) and gen.this is None:
2738            u1: exp.Expr = exp.Rand()
2739            u2: exp.Expr = exp.Rand()
2740        else:
2741            # Seeded: derive two values using HASH with different inputs
2742            seed = gen.this if isinstance(gen, exp.Rand) else gen
2743            u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
2744            u2 = exp.replace_placeholders(
2745                self.SEEDED_RANDOM_TEMPLATE,
2746                seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
2747            )
2748
2749        replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
2750        return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))
2751
2752    def uniform_sql(self, expression: exp.Uniform) -> str:
2753        """
2754        Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
2755
2756        UNIFORM returns a random value in [min, max]:
2757        - Integer result if both min and max are integers
2758        - Float result if either min or max is a float
2759        """
2760        min_val = expression.this
2761        max_val = expression.expression
2762        gen = expression.args.get("gen")
2763
2764        # Determine if result should be integer (both bounds are integers).
2765        # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
2766        is_int_result = min_val.is_int and max_val.is_int
2767
2768        # Build the random value expression [0, 1)
2769        if not isinstance(gen, exp.Rand):
2770            # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
2771            random_expr: exp.Expr = exp.Div(
2772                this=exp.Paren(
2773                    this=exp.Mod(
2774                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
2775                        expression=exp.Literal.number(1000000),
2776                    )
2777                ),
2778                expression=exp.Literal.number(1000000.0),
2779            )
2780        else:
2781            random_expr = exp.Rand()
2782
2783        # Build: min + random * (max - min [+ 1 for int])
2784        range_expr: exp.Expr = exp.Sub(this=max_val, expression=min_val)
2785        if is_int_result:
2786            range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
2787
2788        result: exp.Expr = exp.Add(
2789            this=min_val,
2790            expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
2791        )
2792
2793        if is_int_result:
2794            result = exp.Cast(this=exp.Floor(this=result), to=exp.DType.BIGINT.into_expr())
2795
2796        return self.sql(result)
2797
2798    def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
2799        nano = expression.args.get("nano")
2800        overflow = expression.args.get("overflow")
2801
2802        # Snowflake's TIME_FROM_PARTS supports overflow
2803        if overflow:
2804            hour = expression.args["hour"]
2805            minute = expression.args["min"]
2806            sec = expression.args["sec"]
2807
2808            # Check if values are within normal ranges - use MAKE_TIME for efficiency
2809            if not nano and all(arg.is_int for arg in [hour, minute, sec]):
2810                try:
2811                    h_val = hour.to_py()
2812                    m_val = minute.to_py()
2813                    s_val = sec.to_py()
2814                    if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
2815                        return rename_func("MAKE_TIME")(self, expression)
2816                except ValueError:
2817                    pass
2818
2819            # Overflow or nanoseconds detected - use INTERVAL arithmetic
2820            if nano:
2821                sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
2822
2823            total_seconds = hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
2824
2825            return self.sql(
2826                exp.Add(
2827                    this=exp.Cast(
2828                        this=exp.Literal.string("00:00:00"), to=exp.DType.TIME.into_expr()
2829                    ),
2830                    expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
2831                )
2832            )
2833
2834        # Default: MAKE_TIME
2835        if nano:
2836            expression.set(
2837                "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
2838            )
2839
2840        return rename_func("MAKE_TIME")(self, expression)
2841
2842    def extract_sql(self, expression: exp.Extract) -> str:
2843        """
2844        Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
2845
2846        DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
2847        EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
2848        """
2849        this = expression.this
2850        datetime_expr = expression.expression
2851
2852        # TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
2853        # because Snowflake applies server timezone while DuckDB uses local timezone
2854        if datetime_expr.is_type(exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ):
2855            self.unsupported(
2856                "EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
2857            )
2858
2859        part_name = this.name.upper()
2860
2861        if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
2862            fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
2863
2864            # Problem: strftime doesn't accept TIME and there's no NANOSECOND function
2865            # So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
2866            is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
2867                exp.DType.TIME, exp.DType.TIMETZ
2868            )
2869
2870            if is_nano_time:
2871                self.unsupported("Parameter NANOSECOND is not supported with TIME type in DuckDB")
2872                return self.sql(
2873                    exp.cast(
2874                        exp.Mul(
2875                            this=exp.Extract(this=exp.var("MICROSECOND"), expression=datetime_expr),
2876                            expression=exp.Literal.number(1000),
2877                        ),
2878                        exp.DataType.from_str(cast_type, dialect="duckdb"),
2879                    )
2880                )
2881
2882            # For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
2883            strftime_input = datetime_expr
2884            if part_name == "NANOSECOND":
2885                strftime_input = exp.cast(datetime_expr, exp.DType.TIMESTAMP_NS)
2886
2887            return self.sql(
2888                exp.cast(
2889                    exp.Anonymous(
2890                        this="STRFTIME",
2891                        expressions=[strftime_input, exp.Literal.string(fmt)],
2892                    ),
2893                    exp.DataType.from_str(cast_type, dialect="duckdb"),
2894                )
2895            )
2896
2897        if part_name in self.EXTRACT_EPOCH_MAPPINGS:
2898            func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
2899            result: exp.Expr = exp.Anonymous(this=func_name, expressions=[datetime_expr])
2900            # EPOCH returns float, cast to BIGINT for integer result
2901            if part_name == "EPOCH_SECOND":
2902                result = exp.cast(result, exp.DataType.from_str("BIGINT", dialect="duckdb"))
2903            return self.sql(result)
2904
2905        return super().extract_sql(expression)
2906
2907    def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
2908        # Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
2909        date_expr = expression.this
2910        time_expr = expression.expression
2911
2912        if date_expr is not None and time_expr is not None:
2913            # In DuckDB, DATE + TIME produces TIMESTAMP
2914            return self.sql(exp.Add(this=date_expr, expression=time_expr))
2915
2916        # Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
2917        sec = expression.args.get("sec")
2918        if sec is None:
2919            # This shouldn't happen with valid input, but handle gracefully
2920            return rename_func("MAKE_TIMESTAMP")(self, expression)
2921
2922        milli = expression.args.get("milli")
2923        if milli is not None:
2924            sec += milli.pop() / exp.Literal.number(1000.0)
2925
2926        nano = expression.args.get("nano")
2927        if nano is not None:
2928            sec += nano.pop() / exp.Literal.number(1000000000.0)
2929
2930        if milli or nano:
2931            expression.set("sec", sec)
2932
2933        return rename_func("MAKE_TIMESTAMP")(self, expression)
2934
2935    @unsupported_args("nano")
2936    def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
2937        # Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2938        if nano := expression.args.get("nano"):
2939            nano.pop()
2940
2941        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2942        return f"CAST({timestamp} AS TIMESTAMPTZ)"
2943
2944    @unsupported_args("nano")
2945    def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
2946        # Extract zone before popping
2947        zone = expression.args.get("zone")
2948        # Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2949        if zone:
2950            zone = zone.pop()
2951
2952        if nano := expression.args.get("nano"):
2953            nano.pop()
2954
2955        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2956
2957        if zone:
2958            # Use AT TIME ZONE to apply the explicit timezone
2959            return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
2960
2961        return timestamp
2962
2963    def tablesample_sql(
2964        self,
2965        expression: exp.TableSample,
2966        tablesample_keyword: str | None = None,
2967    ) -> str:
2968        if not isinstance(expression.parent, exp.Select):
2969            # This sample clause only applies to a single source, not the entire resulting relation
2970            tablesample_keyword = "TABLESAMPLE"
2971
2972        if expression.args.get("size"):
2973            method = expression.args.get("method")
2974            if method and method.name.upper() != "RESERVOIR":
2975                self.unsupported(
2976                    f"Sampling method {method} is not supported with a discrete sample count, "
2977                    "defaulting to reservoir sampling"
2978                )
2979                expression.set("method", exp.var("RESERVOIR"))
2980
2981        return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
2982
2983    def join_sql(self, expression: exp.Join) -> str:
2984        if (
2985            not expression.args.get("using")
2986            and not expression.args.get("on")
2987            and not expression.method
2988            and (expression.kind in ("", "INNER", "OUTER"))
2989        ):
2990            # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
2991            # DuckDB doesn't, but we can just add a dummy ON clause that is always true
2992            if isinstance(expression.this, exp.Unnest):
2993                return super().join_sql(expression.on(exp.true()))
2994
2995            expression.set("side", None)
2996            expression.set("kind", None)
2997
2998        return super().join_sql(expression)
2999
3000    def countif_sql(self, expression: exp.CountIf) -> str:
3001        if self.dialect.version >= (1, 2):
3002            return self.function_fallback_sql(expression)
3003
3004        # https://github.com/tobymao/sqlglot/pull/4749
3005        return count_if_to_sum(self, expression)
3006
3007    def bracket_sql(self, expression: exp.Bracket) -> str:
3008        if self.dialect.version >= (1, 2):
3009            return super().bracket_sql(expression)
3010
3011        # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
3012        this = expression.this
3013        if isinstance(this, exp.Array):
3014            this.replace(exp.paren(this))
3015
3016        bracket = super().bracket_sql(expression)
3017
3018        if not expression.args.get("returns_list_for_maps"):
3019            if not this.type:
3020                from sqlglot.optimizer.annotate_types import annotate_types
3021
3022                this = annotate_types(this, dialect=self.dialect)
3023
3024            if this.is_type(exp.DType.MAP):
3025                bracket = f"({bracket})[1]"
3026
3027        return bracket
3028
3029    def withingroup_sql(self, expression: exp.WithinGroup) -> str:
3030        func = expression.this
3031
3032        # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
3033        # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
3034        if isinstance(func, exp.ArrayAgg):
3035            if not isinstance(order := expression.expression, exp.Order):
3036                return self.sql(func)
3037
3038            # Save the original column for FILTER clause (before wrapping with Order)
3039            original_this = func.this
3040
3041            # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
3042            # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
3043            func.set(
3044                "this",
3045                exp.Order(
3046                    this=func.this.copy(),
3047                    expressions=order.expressions,
3048                ),
3049            )
3050
3051            # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
3052            # Use original_this (not the Order-wrapped version) for the FILTER condition
3053            array_agg_sql = self.function_fallback_sql(func)
3054            return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
3055
3056        # For other functions (like PERCENTILES), use existing logic
3057        expression_sql = self.sql(expression, "expression")
3058
3059        if isinstance(func, exp.PERCENTILES):
3060            # Make the order key the first arg and slide the fraction to the right
3061            # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
3062            order_col = expression.find(exp.Ordered)
3063            if order_col:
3064                func.set("expression", func.this)
3065                func.set("this", order_col.this)
3066
3067        this = self.sql(expression, "this").rstrip(")")
3068
3069        return f"{this}{expression_sql})"
3070
3071    def length_sql(self, expression: exp.Length) -> str:
3072        arg = expression.this
3073
3074        # Dialects like BQ and Snowflake also accept binary values as args, so
3075        # DDB will attempt to infer the type or resort to case/when resolution
3076        if not expression.args.get("binary") or arg.is_string:
3077            return self.func("LENGTH", arg)
3078
3079        if not arg.type:
3080            from sqlglot.optimizer.annotate_types import annotate_types
3081
3082            arg = annotate_types(arg, dialect=self.dialect)
3083
3084        if arg.is_type(*exp.DataType.TEXT_TYPES):
3085            return self.func("LENGTH", arg)
3086
3087        # We need these casts to make duckdb's static type checker happy
3088        blob = exp.cast(arg, exp.DType.VARBINARY)
3089        varchar = exp.cast(arg, exp.DType.VARCHAR)
3090
3091        case = (
3092            exp.case(exp.Anonymous(this="TYPEOF", expressions=[arg]))
3093            .when(exp.Literal.string("BLOB"), exp.ByteLength(this=blob))
3094            .else_(exp.Anonymous(this="LENGTH", expressions=[varchar]))
3095        )
3096        return self.sql(case)
3097
3098    def bitlength_sql(self, expression: exp.BitLength) -> str:
3099        if not _is_binary(arg := expression.this):
3100            return self.func("BIT_LENGTH", arg)
3101
3102        blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
3103        return self.sql(exp.ByteLength(this=blob) * exp.Literal.number(8))
3104
3105    def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str:
3106        arg = expression.expressions[0]
3107        if arg.is_type(*exp.DataType.REAL_TYPES):
3108            arg = exp.cast(arg, exp.DType.INT)
3109        return self.func("CHR", arg)
3110
3111    def collation_sql(self, expression: exp.Collation) -> str:
3112        self.unsupported("COLLATION function is not supported by DuckDB")
3113        return self.function_fallback_sql(expression)
3114
3115    def collate_sql(self, expression: exp.Collate) -> str:
3116        if not expression.expression.is_string:
3117            return super().collate_sql(expression)
3118
3119        raw = expression.expression.name
3120        if not raw:
3121            return self.sql(expression.this)
3122
3123        parts = []
3124        for part in raw.split("-"):
3125            lower = part.lower()
3126            if lower not in _SNOWFLAKE_COLLATION_DEFAULTS:
3127                if lower in _SNOWFLAKE_COLLATION_UNSUPPORTED:
3128                    self.unsupported(
3129                        f"Snowflake collation specifier '{part}' has no DuckDB equivalent"
3130                    )
3131                parts.append(lower)
3132
3133        if not parts:
3134            return self.sql(expression.this)
3135        return super().collate_sql(
3136            exp.Collate(this=expression.this, expression=exp.var(".".join(parts)))
3137        )
3138
3139    def _validate_regexp_flags(self, flags: exp.Expr | None, supported_flags: str) -> str | None:
3140        """
3141        Validate and filter regexp flags for DuckDB compatibility.
3142
3143        Args:
3144            flags: The flags expression to validate
3145            supported_flags: String of supported flags (e.g., "ims", "cims").
3146                            Only these flags will be returned.
3147
3148        Returns:
3149            Validated/filtered flag string, or None if no valid flags remain
3150        """
3151        if not isinstance(flags, exp.Expr):
3152            return None
3153
3154        if not flags.is_string:
3155            self.unsupported("Non-literal regexp flags are not fully supported in DuckDB")
3156            return None
3157
3158        flag_str = flags.this
3159        unsupported = set(flag_str) - set(supported_flags)
3160
3161        if unsupported:
3162            self.unsupported(
3163                f"Regexp flags {sorted(unsupported)} are not supported in this context"
3164            )
3165
3166        flag_str = "".join(f for f in flag_str if f in supported_flags)
3167        return flag_str if flag_str else None
3168
3169    def regexpcount_sql(self, expression: exp.RegexpCount) -> str:
3170        this = expression.this
3171        pattern = expression.expression
3172        position = expression.args.get("position")
3173        parameters = expression.args.get("parameters")
3174
3175        # Validate flags - only "ims" flags are supported for embedded patterns
3176        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3177
3178        if position:
3179            this = exp.Substring(this=this, start=position)
3180
3181        # Embed flags in pattern (REGEXP_EXTRACT_ALL doesn't support flags argument)
3182        if validated_flags:
3183            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3184
3185        # Handle empty pattern: Snowflake returns 0, DuckDB would match between every character
3186        result = (
3187            exp.case()
3188            .when(
3189                exp.EQ(this=pattern, expression=exp.Literal.string("")),
3190                exp.Literal.number(0),
3191            )
3192            .else_(
3193                exp.Length(
3194                    this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
3195                )
3196            )
3197        )
3198
3199        return self.sql(result)
3200
3201    def regexpreplace_sql(self, expression: exp.RegexpReplace) -> str:
3202        subject = expression.this
3203        pattern = expression.expression
3204        replacement = expression.args.get("replacement") or exp.Literal.string("")
3205        position = expression.args.get("position")
3206        occurrence = expression.args.get("occurrence")
3207        modifiers = expression.args.get("modifiers")
3208
3209        validated_flags = self._validate_regexp_flags(modifiers, supported_flags="cimsg") or ""
3210
3211        # Handle occurrence (only literals supported)
3212        if occurrence and not occurrence.is_int:
3213            self.unsupported("REGEXP_REPLACE with non-literal occurrence")
3214        else:
3215            occurrence = occurrence.to_py() if occurrence and occurrence.is_int else 0
3216            if occurrence > 1:
3217                self.unsupported(f"REGEXP_REPLACE occurrence={occurrence} not supported")
3218            # flag duckdb to do either all or none, single_replace check is for duckdb round trip
3219            elif (
3220                occurrence == 0
3221                and "g" not in validated_flags
3222                and not expression.args.get("single_replace")
3223            ):
3224                validated_flags += "g"
3225
3226        # Handle position (only literals supported)
3227        prefix = None
3228        if position and not position.is_int:
3229            self.unsupported("REGEXP_REPLACE with non-literal position")
3230        elif position and position.is_int and position.to_py() > 1:
3231            pos = position.to_py()
3232            prefix = exp.Substring(
3233                this=subject, start=exp.Literal.number(1), length=exp.Literal.number(pos - 1)
3234            )
3235            subject = exp.Substring(this=subject, start=exp.Literal.number(pos))
3236
3237        result: exp.Expr = exp.Anonymous(
3238            this="REGEXP_REPLACE",
3239            expressions=[
3240                subject,
3241                pattern,
3242                replacement,
3243                exp.Literal.string(validated_flags) if validated_flags else None,
3244            ],
3245        )
3246
3247        if prefix:
3248            result = exp.Concat(expressions=[prefix, result])
3249
3250        return self.sql(result)
3251
3252    def regexplike_sql(self, expression: exp.RegexpLike) -> str:
3253        this = expression.this
3254        pattern = expression.expression
3255        flag = expression.args.get("flag")
3256
3257        if expression.args.get("full_match"):
3258            validated_flags = self._validate_regexp_flags(flag, supported_flags="cims")
3259            flag = exp.Literal.string(validated_flags) if validated_flags else None
3260            return self.func("REGEXP_FULL_MATCH", this, pattern, flag)
3261
3262        return self.func("REGEXP_MATCHES", this, pattern, flag)
3263
3264    @unsupported_args("ins_cost", "del_cost", "sub_cost")
3265    def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
3266        this = expression.this
3267        expr = expression.expression
3268        max_dist = expression.args.get("max_dist")
3269
3270        if max_dist is None:
3271            return self.func("LEVENSHTEIN", this, expr)
3272
3273        # Emulate Snowflake semantics: if distance > max_dist, return max_dist
3274        levenshtein = exp.Levenshtein(this=this, expression=expr)
3275        return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
3276
3277    def pad_sql(self, expression: exp.Pad) -> str:
3278        """
3279        Handle RPAD/LPAD for VARCHAR and BINARY types.
3280
3281        For VARCHAR: Delegate to parent class
3282        For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))
3283        """
3284        string_arg = expression.this
3285        fill_arg = expression.args.get("fill_pattern") or exp.Literal.string(" ")
3286
3287        if _is_binary(string_arg) or _is_binary(fill_arg):
3288            length_arg = expression.expression
3289            is_left = expression.args.get("is_left")
3290
3291            input_len = exp.ByteLength(this=string_arg)
3292            chars_needed = length_arg - input_len
3293            pad_count = exp.Greatest(
3294                this=exp.Literal.number(0), expressions=[chars_needed], ignore_nulls=True
3295            )
3296            repeat_expr = exp.Repeat(this=fill_arg, times=pad_count)
3297
3298            left, right = string_arg, repeat_expr
3299            if is_left:
3300                left, right = right, left
3301
3302            result = exp.DPipe(this=left, expression=right)
3303            return self.sql(result)
3304
3305        # For VARCHAR: Delegate to parent class (handles PAD_FILL_PATTERN_IS_REQUIRED)
3306        return super().pad_sql(expression)
3307
3308    def minhash_sql(self, expression: exp.Minhash) -> str:
3309        k = expression.this
3310        exprs = expression.expressions
3311
3312        if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
3313            self.unsupported(
3314                "MINHASH with multiple expressions or * requires manual query restructuring"
3315            )
3316            return self.func("MINHASH", k, *exprs)
3317
3318        expr = exprs[0]
3319        result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
3320        return f"({self.sql(result)})"
3321
3322    def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
3323        expr = expression.this
3324        result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
3325        return f"({self.sql(result)})"
3326
3327    def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
3328        expr = expression.this
3329        result = exp.replace_placeholders(self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr)
3330        return f"({self.sql(result)})"
3331
3332    def arrayuniqueagg_sql(self, expression: exp.ArrayUniqueAgg) -> str:
3333        return self.sql(
3334            exp.Filter(
3335                this=exp.func("LIST", exp.Distinct(expressions=[expression.this])),
3336                expression=exp.Where(this=expression.this.copy().is_(exp.null()).not_()),
3337            )
3338        )
3339
3340    def arrayunionagg_sql(self, expression: exp.ArrayUnionAgg) -> str:
3341        self.unsupported("ARRAY_UNION_AGG is not supported in DuckDB")
3342        return self.function_fallback_sql(expression)
3343
3344    def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
3345        arr = expression.this
3346        func = self.func("LIST_DISTINCT", arr)
3347
3348        if expression.args.get("check_null"):
3349            add_null_to_array = exp.func(
3350                "LIST_APPEND", exp.func("LIST_DISTINCT", exp.ArrayCompact(this=arr)), exp.Null()
3351            )
3352            return self.sql(
3353                exp.If(
3354                    this=exp.NEQ(
3355                        this=exp.ArraySize(this=arr), expression=exp.func("LIST_COUNT", arr)
3356                    ),
3357                    true=add_null_to_array,
3358                    false=func,
3359                )
3360            )
3361
3362        return func
3363
3364    def arrayintersect_sql(self, expression: exp.ArrayIntersect) -> str:
3365        if expression.args.get("is_multiset") and len(expression.expressions) == 2:
3366            return self._array_bag_sql(
3367                self.ARRAY_INTERSECTION_CONDITION,
3368                expression.expressions[0],
3369                expression.expressions[1],
3370            )
3371        return self.function_fallback_sql(expression)
3372
3373    def arrayexcept_sql(self, expression: exp.ArrayExcept) -> str:
3374        arr1, arr2 = expression.this, expression.expression
3375        if expression.args.get("is_multiset"):
3376            return self._array_bag_sql(self.ARRAY_EXCEPT_CONDITION, arr1, arr2)
3377        return self.sql(
3378            exp.replace_placeholders(self.ARRAY_EXCEPT_SET_TEMPLATE, arr1=arr1, arr2=arr2)
3379        )
3380
3381    def arrayslice_sql(self, expression: exp.ArraySlice) -> str:
3382        """
3383        Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's
3384        ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE
3385        expressions that adjust the index at query time:
3386          - start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
3387          - end:   CASE WHEN end < 0 THEN end - 1 ELSE end END
3388        """
3389        start, end = expression.args.get("start"), expression.args.get("end")
3390
3391        if expression.args.get("zero_based"):
3392            if start is not None:
3393                start = (
3394                    exp.case()
3395                    .when(
3396                        exp.GTE(this=start.copy(), expression=exp.Literal.number(0)),
3397                        exp.Add(this=start.copy(), expression=exp.Literal.number(1)),
3398                    )
3399                    .else_(start)
3400                )
3401            if end is not None:
3402                end = (
3403                    exp.case()
3404                    .when(
3405                        exp.LT(this=end.copy(), expression=exp.Literal.number(0)),
3406                        exp.Sub(this=end.copy(), expression=exp.Literal.number(1)),
3407                    )
3408                    .else_(end)
3409                )
3410
3411        return self.func("ARRAY_SLICE", expression.this, start, end, expression.args.get("step"))
3412
3413    def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
3414        args = expression.expressions
3415
3416        if not args:
3417            # Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
3418            return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
3419
3420        # Build placeholder values for template
3421        lengths = [exp.Length(this=arg) for arg in args]
3422        max_len = (
3423            lengths[0]
3424            if len(lengths) == 1
3425            else exp.Greatest(this=lengths[0], expressions=lengths[1:])
3426        )
3427
3428        # Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
3429        empty_struct = exp.func(
3430            "STRUCT",
3431            *[
3432                exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
3433                for i in range(len(args))
3434            ],
3435        )
3436
3437        # Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
3438        # COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
3439        index = exp.column("__i") + 1
3440        transform_struct = exp.func(
3441            "STRUCT",
3442            *[
3443                exp.PropertyEQ(
3444                    this=exp.Literal.string(f"${i + 1}"),
3445                    expression=exp.func("COALESCE", arg, exp.array())[index],
3446                )
3447                for i, arg in enumerate(args)
3448            ],
3449        )
3450
3451        result = exp.replace_placeholders(
3452            self.ARRAYS_ZIP_TEMPLATE.copy(),
3453            null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
3454            all_empty_check=exp.and_(
3455                *[
3456                    exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
3457                    for arg in args
3458                ]
3459            ),
3460            empty_struct=empty_struct,
3461            max_len=max_len,
3462            transform_struct=transform_struct,
3463        )
3464        return self.sql(result)
3465
3466    def lower_sql(self, expression: exp.Lower) -> str:
3467        result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
3468        return _gen_with_cast_to_blob(self, expression, result_sql)
3469
3470    def upper_sql(self, expression: exp.Upper) -> str:
3471        result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
3472        return _gen_with_cast_to_blob(self, expression, result_sql)
3473
3474    def reverse_sql(self, expression: exp.Reverse) -> str:
3475        result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
3476        return _gen_with_cast_to_blob(self, expression, result_sql)
3477
3478    def _left_right_sql(self, expression: exp.Left | exp.Right, func_name: str) -> str:
3479        arg = expression.this
3480        length = expression.expression
3481        is_binary = _is_binary(arg)
3482
3483        if is_binary:
3484            # LEFT/RIGHT(blob, n) becomes UNHEX(LEFT/RIGHT(HEX(blob), n * 2))
3485            # Each byte becomes 2 hex chars, so multiply length by 2
3486            hex_arg = exp.Hex(this=arg)
3487            hex_length = exp.Mul(this=length, expression=exp.Literal.number(2))
3488            result: exp.Expression = exp.Unhex(
3489                this=exp.Anonymous(this=func_name, expressions=[hex_arg, hex_length])
3490            )
3491        else:
3492            result = exp.Anonymous(this=func_name, expressions=[arg, length])
3493
3494        if expression.args.get("negative_length_returns_empty"):
3495            empty: exp.Expression = exp.Literal.string("")
3496            if is_binary:
3497                empty = exp.Unhex(this=empty)
3498            result = exp.case().when(length < exp.Literal.number(0), empty).else_(result)
3499
3500        return self.sql(result)
3501
3502    def left_sql(self, expression: exp.Left) -> str:
3503        return self._left_right_sql(expression, "LEFT")
3504
3505    def right_sql(self, expression: exp.Right) -> str:
3506        return self._left_right_sql(expression, "RIGHT")
3507
3508    def rtrimmedlength_sql(self, expression: exp.RtrimmedLength) -> str:
3509        return self.func("LENGTH", exp.Trim(this=expression.this, position="TRAILING"))
3510
3511    def stuff_sql(self, expression: exp.Stuff) -> str:
3512        base = expression.this
3513        start = expression.args["start"]
3514        length = expression.args["length"]
3515        insertion = expression.expression
3516        is_binary = _is_binary(base)
3517
3518        if is_binary:
3519            # DuckDB's SUBSTRING doesn't accept BLOB; operate on the HEX string instead
3520            # (each byte = 2 hex chars), then UNHEX back to BLOB
3521            base = exp.Hex(this=base)
3522            insertion = exp.Hex(this=insertion)
3523            left = exp.Substring(
3524                this=base.copy(),
3525                start=exp.Literal.number(1),
3526                length=(start.copy() - exp.Literal.number(1)) * exp.Literal.number(2),
3527            )
3528            right = exp.Substring(
3529                this=base.copy(),
3530                start=((start + length) - exp.Literal.number(1)) * exp.Literal.number(2)
3531                + exp.Literal.number(1),
3532            )
3533        else:
3534            left = exp.Substring(
3535                this=base.copy(),
3536                start=exp.Literal.number(1),
3537                length=start.copy() - exp.Literal.number(1),
3538            )
3539            right = exp.Substring(this=base.copy(), start=start + length)
3540        result: exp.Expr = exp.DPipe(
3541            this=exp.DPipe(this=left, expression=insertion), expression=right
3542        )
3543
3544        if is_binary:
3545            result = exp.Unhex(this=result)
3546
3547        return self.sql(result)
3548
3549    def rand_sql(self, expression: exp.Rand) -> str:
3550        seed = expression.this
3551        if seed is not None:
3552            self.unsupported("RANDOM with seed is not supported in DuckDB")
3553
3554        lower = expression.args.get("lower")
3555        upper = expression.args.get("upper")
3556
3557        if lower and upper:
3558            # scale DuckDB's [0,1) to the specified range
3559            range_size = exp.paren(upper - lower)
3560            scaled = exp.Add(this=lower, expression=exp.func("random") * range_size)
3561
3562            # For now we assume that if bounds are set, return type is BIGINT. Snowflake/Teradata
3563            result = exp.cast(scaled, exp.DType.BIGINT)
3564            return self.sql(result)
3565
3566        # Default DuckDB behavior - just return RANDOM() as float
3567        return "RANDOM()"
3568
3569    def bytelength_sql(self, expression: exp.ByteLength) -> str:
3570        arg = expression.this
3571
3572        # Check if it's a text type (handles both literals and annotated expressions)
3573        if arg.is_type(*exp.DataType.TEXT_TYPES):
3574            return self.func("OCTET_LENGTH", exp.Encode(this=arg))
3575
3576        # Default: pass through as-is (conservative for DuckDB, handles binary and unannotated)
3577        return self.func("OCTET_LENGTH", arg)
3578
3579    def base64encode_sql(self, expression: exp.Base64Encode) -> str:
3580        # DuckDB TO_BASE64 requires BLOB input
3581        # Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
3582        # encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
3583        result = expression.this
3584
3585        # Check if input is a string type - ENCODE only accepts VARCHAR
3586        if result.is_type(*exp.DataType.TEXT_TYPES):
3587            result = exp.Encode(this=result)
3588
3589        result = exp.ToBase64(this=result)
3590
3591        max_line_length = expression.args.get("max_line_length")
3592        alphabet = expression.args.get("alphabet")
3593
3594        # Handle custom alphabet by replacing standard chars with custom ones
3595        result = _apply_base64_alphabet_replacements(result, alphabet)
3596
3597        # Handle max_line_length by inserting newlines every N characters
3598        line_length = (
3599            t.cast(int, max_line_length.to_py())
3600            if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
3601            else 0
3602        )
3603        if line_length > 0:
3604            newline = exp.Chr(expressions=[exp.Literal.number(10)])
3605            result = exp.Trim(
3606                this=exp.RegexpReplace(
3607                    this=result,
3608                    expression=exp.Literal.string(f"(.{{{line_length}}})"),
3609                    replacement=exp.Concat(expressions=[exp.Literal.string("\\1"), newline.copy()]),
3610                ),
3611                expression=newline,
3612                position="TRAILING",
3613            )
3614
3615        return self.sql(result)
3616
3617    def hex_sql(self, expression: exp.Hex) -> str:
3618        case = expression.args.get("case")
3619
3620        if not case:
3621            return self.func("HEX", expression.this)
3622
3623        hex_expr = exp.Hex(this=expression.this)
3624        return self.sql(
3625            exp.case()
3626            .when(case.is_(exp.null()), exp.null())
3627            .when(case.copy().eq(0), exp.Lower(this=hex_expr.copy()))
3628            .else_(hex_expr)
3629        )
3630
3631    def replace_sql(self, expression: exp.Replace) -> str:
3632        result_sql = self.func(
3633            "REPLACE",
3634            _cast_to_varchar(expression.this),
3635            _cast_to_varchar(expression.expression),
3636            _cast_to_varchar(expression.args.get("replacement")),
3637        )
3638        return _gen_with_cast_to_blob(self, expression, result_sql)
3639
3640    def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
3641        _prepare_binary_bitwise_args(expression)
3642        result_sql = self.binary(expression, op)
3643        return _gen_with_cast_to_blob(self, expression, result_sql)
3644
3645    def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
3646        _prepare_binary_bitwise_args(expression)
3647        result_sql = self.func("XOR", expression.this, expression.expression)
3648        return _gen_with_cast_to_blob(self, expression, result_sql)
3649
3650    def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
3651        this = expression.this
3652        key = expression.args.get("key")
3653        key_sql = key.name if isinstance(key, exp.Expr) else ""
3654        value_sql = self.sql(expression, "value")
3655
3656        kv_sql = f"{key_sql} := {value_sql}"
3657
3658        # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
3659        # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
3660        if isinstance(this, exp.Struct) and not this.expressions:
3661            return self.func("STRUCT_PACK", kv_sql)
3662
3663        return self.func("STRUCT_INSERT", this, kv_sql)
3664
3665    def mapcat_sql(self, expression: exp.MapCat) -> str:
3666        result = exp.replace_placeholders(
3667            self.MAPCAT_TEMPLATE.copy(),
3668            map1=expression.this,
3669            map2=expression.expression,
3670        )
3671        return self.sql(result)
3672
3673    def mapcontainskey_sql(self, expression: exp.MapContainsKey) -> str:
3674        return self.func(
3675            "ARRAY_CONTAINS", exp.func("MAP_KEYS", expression.args["key"]), expression.this
3676        )
3677
3678    def mapdelete_sql(self, expression: exp.MapDelete) -> str:
3679        map_arg = expression.this
3680        keys_to_delete = expression.expressions
3681
3682        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3683
3684        lambda_expr = exp.Lambda(
3685            this=exp.In(this=x_dot_key, expressions=keys_to_delete).not_(),
3686            expressions=[exp.to_identifier("x")],
3687        )
3688        result = exp.func(
3689            "MAP_FROM_ENTRIES",
3690            exp.ArrayFilter(this=exp.func("MAP_ENTRIES", map_arg), expression=lambda_expr),
3691        )
3692        return self.sql(result)
3693
3694    def mappick_sql(self, expression: exp.MapPick) -> str:
3695        map_arg = expression.this
3696        keys_to_pick = expression.expressions
3697
3698        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3699
3700        if len(keys_to_pick) == 1 and keys_to_pick[0].is_type(exp.DType.ARRAY):
3701            lambda_expr = exp.Lambda(
3702                this=exp.func("ARRAY_CONTAINS", keys_to_pick[0], x_dot_key),
3703                expressions=[exp.to_identifier("x")],
3704            )
3705        else:
3706            lambda_expr = exp.Lambda(
3707                this=exp.In(this=x_dot_key, expressions=keys_to_pick),
3708                expressions=[exp.to_identifier("x")],
3709            )
3710
3711        result = exp.func(
3712            "MAP_FROM_ENTRIES",
3713            exp.func("LIST_FILTER", exp.func("MAP_ENTRIES", map_arg), lambda_expr),
3714        )
3715        return self.sql(result)
3716
3717    def mapsize_sql(self, expression: exp.MapSize) -> str:
3718        return self.func("CARDINALITY", expression.this)
3719
3720    @unsupported_args("update_flag")
3721    def mapinsert_sql(self, expression: exp.MapInsert) -> str:
3722        map_arg = expression.this
3723        key = expression.args.get("key")
3724        value = expression.args.get("value")
3725
3726        map_type = map_arg.type
3727
3728        if value is not None:
3729            if map_type and map_type.expressions and len(map_type.expressions) > 1:
3730                # Extract the value type from MAP(key_type, value_type)
3731                value_type = map_type.expressions[1]
3732                # Cast value to match the map's value type to avoid type conflicts
3733                value = exp.cast(value, value_type)
3734            # else: polymorphic MAP case - no type parameters available, use value as-is
3735
3736        # Create a single-entry map for the new key-value pair
3737        new_entry_struct = exp.Struct(expressions=[exp.PropertyEQ(this=key, expression=value)])
3738        new_entry: exp.Expression = exp.ToMap(this=new_entry_struct)
3739
3740        # Use MAP_CONCAT to merge the original map with the new entry
3741        # This automatically handles both insert and update cases
3742        result = exp.func("MAP_CONCAT", map_arg, new_entry)
3743
3744        return self.sql(result)
3745
3746    def startswith_sql(self, expression: exp.StartsWith) -> str:
3747        return self.func(
3748            "STARTS_WITH",
3749            _cast_to_varchar(expression.this),
3750            _cast_to_varchar(expression.expression),
3751        )
3752
3753    def space_sql(self, expression: exp.Space) -> str:
3754        # DuckDB's REPEAT requires BIGINT for the count parameter
3755        return self.sql(
3756            exp.Repeat(
3757                this=exp.Literal.string(" "),
3758                times=exp.cast(expression.this, exp.DType.BIGINT),
3759            )
3760        )
3761
3762    def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
3763        # For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
3764        if isinstance(expression.this, exp.Generator):
3765            # Preserve alias, joins, and other table-level args
3766            table = exp.Table(
3767                this=expression.this,
3768                alias=expression.args.get("alias"),
3769                joins=expression.args.get("joins"),
3770            )
3771            return self.sql(table)
3772
3773        return super().tablefromrows_sql(expression)
3774
3775    def unnest_sql(self, expression: exp.Unnest) -> str:
3776        explode_array = expression.args.get("explode_array")
3777        if explode_array:
3778            # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
3779            # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
3780            expression.expressions.append(
3781                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
3782            )
3783
3784            # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
3785            alias = expression.args.get("alias")
3786            if isinstance(alias, exp.TableAlias):
3787                expression.set("alias", None)
3788                if alias.columns:
3789                    alias = exp.TableAlias(this=seq_get(alias.columns, 0))
3790
3791            unnest_sql = super().unnest_sql(expression)
3792            select = exp.Select(expressions=[unnest_sql]).subquery(alias)
3793            return self.sql(select)
3794
3795        return super().unnest_sql(expression)
3796
3797    def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
3798        this = expression.this
3799
3800        if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3801            # DuckDB should render IGNORE NULLS only for the general-purpose
3802            # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
3803            return super().ignorenulls_sql(expression)
3804
3805        if isinstance(this, exp.First):
3806            this = exp.AnyValue(this=this.this)
3807
3808        if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
3809            self.unsupported("IGNORE NULLS is not supported for non-window functions.")
3810
3811        return self.sql(this)
3812
3813    def split_sql(self, expression: exp.Split) -> str:
3814        base_func = exp.func("STR_SPLIT", expression.this, expression.expression)
3815
3816        case_expr = exp.case().else_(base_func)
3817        needs_case = False
3818
3819        if expression.args.get("null_returns_null"):
3820            case_expr = case_expr.when(expression.expression.is_(exp.null()), exp.null())
3821            needs_case = True
3822
3823        if expression.args.get("empty_delimiter_returns_whole"):
3824            # When delimiter is empty string, return input string as single array element
3825            array_with_input = exp.array(expression.this)
3826            case_expr = case_expr.when(
3827                expression.expression.eq(exp.Literal.string("")), array_with_input
3828            )
3829            needs_case = True
3830
3831        return self.sql(case_expr if needs_case else base_func)
3832
3833    def splitpart_sql(self, expression: exp.SplitPart) -> str:
3834        string_arg = expression.this
3835        delimiter_arg = expression.args.get("delimiter")
3836        part_index_arg = expression.args.get("part_index")
3837
3838        if delimiter_arg and part_index_arg:
3839            # Handle Snowflake's "index 0 and 1 both return first element" behavior
3840            if expression.args.get("part_index_zero_as_one"):
3841                # Convert 0 to 1 for compatibility
3842
3843                part_index_arg = exp.Paren(
3844                    this=exp.case()
3845                    .when(part_index_arg.eq(exp.Literal.number("0")), exp.Literal.number("1"))
3846                    .else_(part_index_arg)
3847                )
3848
3849            # Use Anonymous to avoid recursion
3850            base_func_expr: exp.Expr = exp.Anonymous(
3851                this="SPLIT_PART", expressions=[string_arg, delimiter_arg, part_index_arg]
3852            )
3853            needs_case_transform = False
3854            case_expr = exp.case().else_(base_func_expr)
3855
3856            if expression.args.get("empty_delimiter_returns_whole"):
3857                # When delimiter is empty string:
3858                # - Return whole string if part_index is 1 or -1
3859                # - Return empty string otherwise
3860                empty_case = exp.Paren(
3861                    this=exp.case()
3862                    .when(
3863                        exp.or_(
3864                            part_index_arg.eq(exp.Literal.number("1")),
3865                            part_index_arg.eq(exp.Literal.number("-1")),
3866                        ),
3867                        string_arg,
3868                    )
3869                    .else_(exp.Literal.string(""))
3870                )
3871
3872                case_expr = case_expr.when(delimiter_arg.eq(exp.Literal.string("")), empty_case)
3873                needs_case_transform = True
3874
3875            """
3876            Output looks something like this:
3877
3878            CASE
3879            WHEN delimiter is '' THEN
3880                (
3881                    CASE
3882                    WHEN adjusted_part_index = 1 OR adjusted_part_index = -1 THEN input
3883                    ELSE '' END
3884                )
3885            ELSE SPLIT_PART(input, delimiter, adjusted_part_index)
3886            END
3887
3888            """
3889            return self.sql(case_expr if needs_case_transform else base_func_expr)
3890
3891        return self.function_fallback_sql(expression)
3892
3893    def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
3894        if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3895            # DuckDB should render RESPECT NULLS only for the general-purpose
3896            # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
3897            return super().respectnulls_sql(expression)
3898
3899        self.unsupported("RESPECT NULLS is not supported for non-window functions.")
3900        return self.sql(expression, "this")
3901
3902    def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
3903        null = expression.args.get("null")
3904
3905        if expression.args.get("null_is_empty"):
3906            x = exp.to_identifier("x")
3907            list_transform = exp.Transform(
3908                this=expression.this.copy(),
3909                expression=exp.Lambda(
3910                    this=exp.Coalesce(
3911                        this=exp.cast(x, "TEXT"), expressions=[exp.Literal.string("")]
3912                    ),
3913                    expressions=[x],
3914                ),
3915            )
3916            array_to_string = exp.ArrayToString(
3917                this=list_transform, expression=expression.expression
3918            )
3919            if expression.args.get("null_delim_is_null"):
3920                return self.sql(
3921                    exp.case()
3922                    .when(expression.expression.copy().is_(exp.null()), exp.null())
3923                    .else_(array_to_string)
3924                )
3925            return self.sql(array_to_string)
3926
3927        if null:
3928            x = exp.to_identifier("x")
3929            return self.sql(
3930                exp.ArrayToString(
3931                    this=exp.Transform(
3932                        this=expression.this,
3933                        expression=exp.Lambda(
3934                            this=exp.Coalesce(this=x, expressions=[null]),
3935                            expressions=[x],
3936                        ),
3937                    ),
3938                    expression=expression.expression,
3939                )
3940            )
3941
3942        return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
3943
3944    def concatws_sql(self, expression: exp.ConcatWs) -> str:
3945        # DuckDB-specific: handle binary types using DPipe (||) operator
3946        separator = seq_get(expression.expressions, 0)
3947        args = expression.expressions[1:]
3948
3949        if any(_is_binary(arg) for arg in [separator, *args]):
3950            result = args[0]
3951            for arg in args[1:]:
3952                result = exp.DPipe(
3953                    this=exp.DPipe(this=result, expression=separator), expression=arg
3954                )
3955            return self.sql(result)
3956
3957        return super().concatws_sql(expression)
3958
3959    def _regexp_extract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str:
3960        this = expression.this
3961        group = expression.args.get("group")
3962        params = expression.args.get("parameters")
3963        position = expression.args.get("position")
3964        occurrence = expression.args.get("occurrence")
3965        null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
3966
3967        # Handle Snowflake's 'e' flag: it enables capture group extraction
3968        # In DuckDB, this is controlled by the group parameter directly
3969        if params and params.is_string and "e" in params.name:
3970            params = exp.Literal.string(params.name.replace("e", ""))
3971
3972        validated_flags = self._validate_regexp_flags(params, supported_flags="cims")
3973
3974        # Strip default group when no following params (DuckDB default is same as group=0)
3975        if (
3976            not validated_flags
3977            and group
3978            and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
3979        ):
3980            group = None
3981
3982        flags_expr = exp.Literal.string(validated_flags) if validated_flags else None
3983
3984        # use substring to handle position argument
3985        if position and (not position.is_int or position.to_py() > 1):
3986            this = exp.Substring(this=this, start=position)
3987
3988            if null_if_pos_overflow:
3989                this = exp.Nullif(this=this, expression=exp.Literal.string(""))
3990
3991        is_extract_all = isinstance(expression, exp.RegexpExtractAll)
3992        non_single_occurrence = occurrence and (not occurrence.is_int or occurrence.to_py() > 1)
3993
3994        if is_extract_all or non_single_occurrence:
3995            name = "REGEXP_EXTRACT_ALL"
3996        else:
3997            name = "REGEXP_EXTRACT"
3998
3999        result: exp.Expr = exp.Anonymous(
4000            this=name, expressions=[this, expression.expression, group, flags_expr]
4001        )
4002
4003        # Array slicing for REGEXP_EXTRACT_ALL with occurrence
4004        if is_extract_all and non_single_occurrence:
4005            result = exp.Bracket(this=result, expressions=[exp.Slice(this=occurrence)])
4006        # ARRAY_EXTRACT for REGEXP_EXTRACT with occurrence > 1
4007        elif non_single_occurrence:
4008            result = exp.Anonymous(this="ARRAY_EXTRACT", expressions=[result, occurrence])
4009
4010        return self.sql(result)
4011
4012    def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
4013        return self._regexp_extract_sql(expression)
4014
4015    def regexpextractall_sql(self, expression: exp.RegexpExtractAll) -> str:
4016        return self._regexp_extract_sql(expression)
4017
4018    def regexpinstr_sql(self, expression: exp.RegexpInstr) -> str:
4019        this = expression.this
4020        pattern = expression.expression
4021        position = expression.args.get("position")
4022        orig_occ = expression.args.get("occurrence")
4023        occurrence = orig_occ or exp.Literal.number(1)
4024        option = expression.args.get("option")
4025        parameters = expression.args.get("parameters")
4026
4027        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
4028        if validated_flags:
4029            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
4030
4031        # Handle starting position offset
4032        pos_offset: exp.Expr = exp.Literal.number(0)
4033        if position and (not position.is_int or position.to_py() > 1):
4034            this = exp.Substring(this=this, start=position)
4035            pos_offset = position - exp.Literal.number(1)
4036
4037        # Helper: LIST_SUM(LIST_TRANSFORM(list[1:end], x -> LENGTH(x)))
4038        def sum_lengths(func_name: str, end: exp.Expr) -> exp.Expr:
4039            lst = exp.Bracket(
4040                this=exp.Anonymous(this=func_name, expressions=[this, pattern]),
4041                expressions=[exp.Slice(this=exp.Literal.number(1), expression=end)],
4042                offset=1,
4043            )
4044            transform = exp.Anonymous(
4045                this="LIST_TRANSFORM",
4046                expressions=[
4047                    lst,
4048                    exp.Lambda(
4049                        this=exp.Length(this=exp.to_identifier("x")),
4050                        expressions=[exp.to_identifier("x")],
4051                    ),
4052                ],
4053            )
4054            return exp.Coalesce(
4055                this=exp.Anonymous(this="LIST_SUM", expressions=[transform]),
4056                expressions=[exp.Literal.number(0)],
4057            )
4058
4059        # Position = 1 + sum(split_lengths[1:occ]) + sum(match_lengths[1:occ-1]) + offset
4060        base_pos: exp.Expr = (
4061            exp.Literal.number(1)
4062            + sum_lengths("STRING_SPLIT_REGEX", occurrence)
4063            + sum_lengths("REGEXP_EXTRACT_ALL", occurrence - exp.Literal.number(1))
4064            + pos_offset
4065        )
4066
4067        # option=1: add match length for end position
4068        if option and option.is_int and option.to_py() == 1:
4069            match_at_occ = exp.Bracket(
4070                this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern]),
4071                expressions=[occurrence],
4072                offset=1,
4073            )
4074            base_pos = base_pos + exp.Coalesce(
4075                this=exp.Length(this=match_at_occ), expressions=[exp.Literal.number(0)]
4076            )
4077
4078        # NULL checks for all provided arguments
4079        # .copy() is used strictly because .is_() alters the node's parent pointer, mutating the parsed AST
4080        null_args = [
4081            expression.this,
4082            expression.expression,
4083            position,
4084            orig_occ,
4085            option,
4086            parameters,
4087        ]
4088        null_checks = [arg.copy().is_(exp.Null()) for arg in null_args if arg]
4089
4090        matches = exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
4091
4092        return self.sql(
4093            exp.case()
4094            .when(exp.or_(*null_checks), exp.Null())
4095            .when(pattern.copy().eq(exp.Literal.string("")), exp.Literal.number(0))
4096            .when(exp.Length(this=matches) < occurrence, exp.Literal.number(0))
4097            .else_(base_pos)
4098        )
4099
4100    @unsupported_args("culture")
4101    def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
4102        fmt = expression.args.get("format")
4103        if fmt and fmt.is_int:
4104            return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
4105
4106        self.unsupported("Only integer formats are supported by NumberToStr")
4107        return self.function_fallback_sql(expression)
4108
4109    def autoincrementcolumnconstraint_sql(self, _) -> str:
4110        self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
4111        return ""
4112
4113    def aliases_sql(self, expression: exp.Aliases) -> str:
4114        this = expression.this
4115        if isinstance(this, exp.Posexplode):
4116            return self.posexplode_sql(this)
4117
4118        return super().aliases_sql(expression)
4119
4120    def posexplode_sql(self, expression: exp.Posexplode) -> str:
4121        this = expression.this
4122        parent = expression.parent
4123
4124        # The default Spark aliases are "pos" and "col", unless specified otherwise
4125        pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
4126
4127        if isinstance(parent, exp.Aliases):
4128            # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
4129            pos, col = parent.expressions
4130        elif isinstance(parent, exp.Table):
4131            # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
4132            alias = parent.args.get("alias")
4133            if alias:
4134                pos, col = alias.columns or [pos, col]
4135                alias.pop()
4136
4137        # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
4138        # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
4139        unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
4140        gen_subscripts = self.sql(
4141            exp.Alias(
4142                this=exp.Anonymous(
4143                    this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
4144                )
4145                - exp.Literal.number(1),
4146                alias=pos,
4147            )
4148        )
4149
4150        posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
4151
4152        if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
4153            # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
4154            return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
4155
4156        return posexplode_sql
4157
4158    def addmonths_sql(self, expression: exp.AddMonths) -> str:
4159        """
4160        Handles three key issues:
4161        1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
4162        2. End-of-month preservation: If input is last day of month, result is last day of result month
4163        3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
4164        """
4165        from sqlglot.optimizer.annotate_types import annotate_types
4166
4167        this = expression.this
4168        if not this.type:
4169            this = annotate_types(this, dialect=self.dialect)
4170
4171        if this.is_type(*exp.DataType.TEXT_TYPES):
4172            this = exp.Cast(this=this, to=exp.DataType(this=exp.DType.TIMESTAMP))
4173
4174        # Detect float/decimal months to apply rounding (Snowflake behavior)
4175        # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
4176        months_expr = expression.expression
4177        if not months_expr.type:
4178            months_expr = annotate_types(months_expr, dialect=self.dialect)
4179
4180        # Build interval or to_months expression based on type
4181        # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
4182        interval_or_to_months = (
4183            exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
4184            if months_expr.is_type(
4185                exp.DType.FLOAT,
4186                exp.DType.DOUBLE,
4187                exp.DType.DECIMAL,
4188            )
4189            # Integer case: standard INTERVAL N MONTH syntax
4190            else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
4191        )
4192
4193        date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
4194
4195        # Apply end-of-month preservation if Snowflake flag is set
4196        # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
4197        preserve_eom = expression.args.get("preserve_end_of_month")
4198        result_expr = (
4199            exp.case()
4200            .when(
4201                exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
4202                exp.func("LAST_DAY", date_add_expr),
4203            )
4204            .else_(date_add_expr)
4205            if preserve_eom
4206            else date_add_expr
4207        )
4208
4209        # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
4210        # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type)
4211        # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
4212        # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
4213        if this.is_type(exp.DType.DATE, exp.DType.TIMESTAMPTZ):
4214            return self.sql(exp.Cast(this=result_expr, to=this.type))
4215        return self.sql(result_expr)
4216
4217    def format_sql(self, expression: exp.Format) -> str:
4218        if expression.name.lower() == "%s" and len(expression.expressions) == 1:
4219            return self.func("FORMAT", "'{}'", expression.expressions[0])
4220
4221        return self.function_fallback_sql(expression)
4222
4223    def hexstring_sql(
4224        self, expression: exp.HexString, binary_function_repr: str | None = None
4225    ) -> str:
4226        # UNHEX('FF') correctly produces blob \xFF in DuckDB
4227        return super().hexstring_sql(expression, binary_function_repr="UNHEX")
4228
4229    def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
4230        unit = expression.args.get("unit")
4231        date = expression.this
4232
4233        week_start = _week_unit_to_dow(unit)
4234        unit = unit_to_str(expression)
4235
4236        if week_start:
4237            result = self.sql(
4238                _build_week_trunc_expression(date, week_start, preserve_start_day=True)
4239            )
4240        else:
4241            result = self.func("DATE_TRUNC", unit, date)
4242
4243        if (
4244            expression.args.get("input_type_preserved")
4245            and date.is_type(*exp.DataType.TEMPORAL_TYPES)
4246            and not (is_date_unit(unit) and date.is_type(exp.DType.DATE))
4247        ):
4248            return self.sql(exp.Cast(this=result, to=date.type))
4249
4250        return result
4251
4252    def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
4253        unit = unit_to_str(expression)
4254        zone = expression.args.get("zone")
4255        timestamp = expression.this
4256        date_unit = is_date_unit(unit)
4257
4258        if date_unit and zone:
4259            # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
4260            # Double AT TIME ZONE needed for BigQuery compatibility:
4261            # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
4262            # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
4263            timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
4264            result_sql = self.func("DATE_TRUNC", unit, timestamp)
4265            return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
4266
4267        result = self.func("DATE_TRUNC", unit, timestamp)
4268        if expression.args.get("input_type_preserved"):
4269            if timestamp.type and timestamp.is_type(exp.DType.TIME, exp.DType.TIMETZ):
4270                dummy_date = exp.Cast(
4271                    this=exp.Literal.string("1970-01-01"),
4272                    to=exp.DataType(this=exp.DType.DATE),
4273                )
4274                date_time = exp.Add(this=dummy_date, expression=timestamp)
4275                result = self.func("DATE_TRUNC", unit, date_time)
4276                return self.sql(exp.Cast(this=result, to=timestamp.type))
4277
4278            if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
4279                date_unit and timestamp.is_type(exp.DType.DATE)
4280            ):
4281                return self.sql(exp.Cast(this=result, to=timestamp.type))
4282
4283        return result
4284
4285    def trim_sql(self, expression: exp.Trim) -> str:
4286        expression.this.replace(_cast_to_varchar(expression.this))
4287        if expression.expression:
4288            expression.expression.replace(_cast_to_varchar(expression.expression))
4289
4290        result_sql = super().trim_sql(expression)
4291        return _gen_with_cast_to_blob(self, expression, result_sql)
4292
4293    def round_sql(self, expression: exp.Round) -> str:
4294        this = expression.this
4295        decimals = expression.args.get("decimals")
4296        truncate = expression.args.get("truncate")
4297
4298        # DuckDB requires the scale (decimals) argument to be an INT
4299        # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
4300        if decimals is not None and expression.args.get("casts_non_integer_decimals"):
4301            if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
4302                decimals = exp.cast(decimals, exp.DType.INT)
4303
4304        func = "ROUND"
4305        if truncate:
4306            # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
4307            if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
4308                func = "ROUND_EVEN"
4309                truncate = None
4310            # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
4311            elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
4312                truncate = None
4313
4314        return self.func(func, this, decimals, truncate)
4315
4316    def strtok_sql(self, expression: exp.Strtok) -> str:
4317        string_arg = expression.this
4318        delimiter_arg = expression.args.get("delimiter")
4319        part_index_arg = expression.args.get("part_index")
4320
4321        if delimiter_arg and part_index_arg:
4322            # Escape regex chars and build character class at runtime using REGEXP_REPLACE
4323            escaped_delimiter = exp.Anonymous(
4324                this="REGEXP_REPLACE",
4325                expressions=[
4326                    delimiter_arg,
4327                    exp.Literal.string(
4328                        r"([\[\]^.\-*+?(){}|$\\])"
4329                    ),  # Escape problematic regex chars
4330                    exp.Literal.string(
4331                        r"\\\1"
4332                    ),  # Replace with escaped version using $1 backreference
4333                    exp.Literal.string("g"),  # Global flag
4334                ],
4335            )
4336            # CASE WHEN delimiter = '' THEN '' ELSE CONCAT('[', escaped_delimiter, ']') END
4337            regex_pattern = (
4338                exp.case()
4339                .when(delimiter_arg.eq(exp.Literal.string("")), exp.Literal.string(""))
4340                .else_(
4341                    exp.func(
4342                        "CONCAT",
4343                        exp.Literal.string("["),
4344                        escaped_delimiter,
4345                        exp.Literal.string("]"),
4346                    )
4347                )
4348            )
4349
4350            # STRTOK skips empty strings, so we need to filter them out
4351            # LIST_FILTER(REGEXP_SPLIT_TO_ARRAY(string, pattern), x -> x != '')[index]
4352            split_array = exp.func("REGEXP_SPLIT_TO_ARRAY", string_arg, regex_pattern)
4353            x = exp.to_identifier("x")
4354            is_empty = x.eq(exp.Literal.string(""))
4355            filtered_array = exp.func(
4356                "LIST_FILTER",
4357                split_array,
4358                exp.Lambda(this=exp.not_(is_empty.copy()), expressions=[x.copy()]),
4359            )
4360            base_func = exp.Bracket(
4361                this=filtered_array,
4362                expressions=[part_index_arg],
4363                offset=1,
4364            )
4365
4366            # Use template with the built regex pattern
4367            result = exp.replace_placeholders(
4368                self.STRTOK_TEMPLATE.copy(),
4369                string=string_arg,
4370                delimiter=delimiter_arg,
4371                part_index=part_index_arg,
4372                base_func=base_func,
4373            )
4374
4375            return self.sql(result)
4376
4377        return self.function_fallback_sql(expression)
4378
4379    def strtoktoarray_sql(self, expression: exp.StrtokToArray) -> str:
4380        string_arg = expression.this
4381        delimiter_arg = expression.args.get("expression") or exp.Literal.string(" ")
4382
4383        escaped = exp.RegexpReplace(
4384            this=delimiter_arg.copy(),
4385            expression=exp.Literal.string(r"([\[\]^.\-*+?(){}|$\\])"),
4386            replacement=exp.Literal.string(r"\\\1"),
4387            modifiers=exp.Literal.string("g"),
4388        )
4389        return self.sql(
4390            exp.replace_placeholders(
4391                self.STRTOK_TO_ARRAY_TEMPLATE.copy(),
4392                string=string_arg,
4393                delimiter=delimiter_arg,
4394                escaped=escaped,
4395            )
4396        )
4397
4398    def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
4399        result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
4400
4401        # DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
4402        if expression.is_type(*exp.DataType.REAL_TYPES):
4403            result = f"CAST({result} AS DOUBLE)"
4404
4405        return result
4406
4407    def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
4408        """
4409        BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
4410        dividing the input distribution into n equal-sized buckets.
4411
4412        Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
4413        does not document the specific algorithm used so results may differ. DuckDB does not
4414        support RESPECT NULLS.
4415        """
4416        this = expression.this
4417        if isinstance(this, exp.Distinct):
4418            # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
4419            if len(this.expressions) < 2:
4420                self.unsupported("APPROX_QUANTILES requires a bucket count argument")
4421                return self.function_fallback_sql(expression)
4422            num_quantiles_expr = this.expressions[1].pop()
4423        else:
4424            num_quantiles_expr = expression.expression
4425
4426        if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
4427            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4428            return self.function_fallback_sql(expression)
4429
4430        num_quantiles = t.cast(int, num_quantiles_expr.to_py())
4431        if num_quantiles <= 0:
4432            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4433            return self.function_fallback_sql(expression)
4434
4435        quantiles = [
4436            exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
4437            for i in range(num_quantiles + 1)
4438        ]
4439
4440        return self.sql(exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)))
4441
4442    def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
4443        if expression.args.get("scalar_only"):
4444            expression = exp.JSONExtractScalar(
4445                this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
4446            )
4447        return _arrow_json_extract_sql(self, expression)
4448
4449    def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
4450        this = expression.this
4451
4452        if _is_binary(this):
4453            expression.type = exp.DType.BINARY.into_expr()
4454
4455        arg = _cast_to_bit(this)
4456
4457        if isinstance(this, exp.Neg):
4458            arg = exp.Paren(this=arg)
4459
4460        expression.set("this", arg)
4461
4462        result_sql = f"~{self.sql(expression, 'this')}"
4463
4464        return _gen_with_cast_to_blob(self, expression, result_sql)
4465
4466    def window_sql(self, expression: exp.Window) -> str:
4467        this = expression.this
4468        if isinstance(this, exp.Corr) or (
4469            isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
4470        ):
4471            return self._corr_sql(expression)
4472
4473        return super().window_sql(expression)
4474
4475    def filter_sql(self, expression: exp.Filter) -> str:
4476        if isinstance(expression.this, exp.Corr):
4477            return self._corr_sql(expression)
4478
4479        return super().filter_sql(expression)
4480
4481    def _corr_sql(
4482        self,
4483        expression: exp.Filter | exp.Window | exp.Corr,
4484    ) -> str:
4485        if isinstance(expression, exp.Corr) and not expression.args.get("null_on_zero_variance"):
4486            return self.func("CORR", expression.this, expression.expression)
4487
4488        corr_expr = _maybe_corr_null_to_false(expression)
4489        if corr_expr is None:
4490            if isinstance(expression, exp.Window):
4491                return super().window_sql(expression)
4492            if isinstance(expression, exp.Filter):
4493                return super().filter_sql(expression)
4494            corr_expr = expression  # make mypy happy
4495
4496        return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
4497
4498    def uuid_sql(self, expression: exp.Uuid) -> str:
4499        namespace = expression.this
4500        name = expression.args.get("name")
4501
4502        # UUID v5 (namespace + name) - Emulate using SHA1
4503        if namespace and name:
4504            result = exp.replace_placeholders(
4505                self.UUID_V5_TEMPLATE.copy(),
4506                namespace=namespace,
4507                name=name,
4508            )
4509            return self.sql(result)
4510
4511        return super().uuid_sql(expression)
TIMEZONE_PATTERN = re.compile(':\\d{2}.*?[+\\-]\\d{2}(?::\\d{2})?')
REGEX_ESCAPE_REPLACEMENTS = {'\\': '\\\\', '-': '\\-', '^': '\\^', '[': '\\[', ']': '\\]'}
RANDSTR_CHAR_POOL = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
RANDSTR_SEED = 123456
WS_CONTROL_CHARS_TO_DUCK = {'\x0b': 11, '\x1c': 28, '\x1d': 29, '\x1e': 30, '\x1f': 31}
WEEK_START_DAY_TO_DOW = {'MONDAY': 1, 'TUESDAY': 2, 'WEDNESDAY': 3, 'THURSDAY': 4, 'FRIDAY': 5, 'SATURDAY': 6, 'SUNDAY': 7}
MAX_BIT_POSITION = Literal(this=32768, is_string=False)
WRAPPED_JSON_EXTRACT_EXPRESSIONS = (<class 'sqlglot.expressions.core.Binary'>, <class 'sqlglot.expressions.core.Bracket'>, <class 'sqlglot.expressions.core.In'>, <class 'sqlglot.expressions.core.Not'>)
class DuckDBGenerator(sqlglot.generator.Generator):
1456class DuckDBGenerator(generator.Generator):
1457    PARAMETER_TOKEN = "$"
1458    NAMED_PLACEHOLDER_TOKEN = "$"
1459    JOIN_HINTS = False
1460    TABLE_HINTS = False
1461    QUERY_HINTS = False
1462    LIMIT_FETCH = "LIMIT"
1463    STRUCT_DELIMITER = ("(", ")")
1464    RENAME_TABLE_WITH_DB = False
1465    NVL2_SUPPORTED = False
1466    SEMI_ANTI_JOIN_WITH_SIDE = False
1467    TABLESAMPLE_KEYWORDS = "USING SAMPLE"
1468    TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
1469    LAST_DAY_SUPPORTS_DATE_PART = False
1470    JSON_KEY_VALUE_PAIR_SEP = ","
1471    IGNORE_NULLS_IN_FUNC = True
1472    IGNORE_NULLS_BEFORE_ORDER = False
1473    JSON_PATH_BRACKETED_KEY_SUPPORTED = False
1474    SUPPORTS_CREATE_TABLE_LIKE = False
1475    MULTI_ARG_DISTINCT = False
1476    CAN_IMPLEMENT_ARRAY_ANY = True
1477    SUPPORTS_TO_NUMBER = False
1478    SELECT_KINDS: tuple[str, ...] = ()
1479    SUPPORTS_DECODE_CASE = False
1480    SUPPORTS_DROP_ALTER_ICEBERG_PROPERTY = False
1481
1482    AFTER_HAVING_MODIFIER_TRANSFORMS = generator.AFTER_HAVING_MODIFIER_TRANSFORMS
1483    SUPPORTS_WINDOW_EXCLUDE = True
1484    COPY_HAS_INTO_KEYWORD = False
1485    STAR_EXCEPT = "EXCLUDE"
1486    PAD_FILL_PATTERN_IS_REQUIRED = True
1487    ARRAY_SIZE_DIM_REQUIRED: bool | None = False
1488    NORMALIZE_EXTRACT_DATE_PARTS = True
1489    SUPPORTS_LIKE_QUANTIFIERS = False
1490    SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
1491
1492    TRANSFORMS = {
1493        **generator.Generator.TRANSFORMS,
1494        exp.AnyValue: _anyvalue_sql,
1495        exp.ApproxDistinct: approx_count_distinct_sql,
1496        exp.Boolnot: _boolnot_sql,
1497        exp.Booland: _booland_sql,
1498        exp.Boolor: _boolor_sql,
1499        exp.Array: transforms.preprocess(
1500            [transforms.inherit_struct_field_names],
1501            generator=inline_array_unless_query,
1502        ),
1503        exp.ArrayAppend: array_append_sql("LIST_APPEND"),
1504        exp.ArrayCompact: array_compact_sql,
1505        exp.ArrayConstructCompact: lambda self, e: self.sql(
1506            exp.ArrayCompact(this=exp.Array(expressions=e.expressions))
1507        ),
1508        exp.ArrayConcat: array_concat_sql("LIST_CONCAT"),
1509        exp.ArrayContains: _array_contains_sql,
1510        exp.ArrayOverlaps: _array_overlaps_sql,
1511        exp.ArrayFilter: rename_func("LIST_FILTER"),
1512        exp.ArrayInsert: _array_insert_sql,
1513        exp.ArrayPosition: lambda self, e: (
1514            self.sql(
1515                exp.Sub(
1516                    this=exp.ArrayPosition(this=e.this, expression=e.expression),
1517                    expression=exp.Literal.number(1),
1518                )
1519            )
1520            if e.args.get("zero_based")
1521            else self.func("ARRAY_POSITION", e.this, e.expression)
1522        ),
1523        exp.ArrayRemoveAt: _array_remove_at_sql,
1524        exp.ArrayRemove: remove_from_array_using_filter,
1525        exp.ArraySort: _array_sort_sql,
1526        exp.ArrayPrepend: array_append_sql("LIST_PREPEND", swap_params=True),
1527        exp.ArraySum: rename_func("LIST_SUM"),
1528        exp.ArrayMax: rename_func("LIST_MAX"),
1529        exp.ArrayMin: rename_func("LIST_MIN"),
1530        exp.Base64DecodeBinary: lambda self, e: _base64_decode_sql(self, e, to_string=False),
1531        exp.Base64DecodeString: lambda self, e: _base64_decode_sql(self, e, to_string=True),
1532        exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
1533        exp.BitwiseAndAgg: _bitwise_agg_sql,
1534        exp.BitwiseCount: rename_func("BIT_COUNT"),
1535        exp.BitwiseLeftShift: _bitshift_sql,
1536        exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
1537        exp.BitwiseOrAgg: _bitwise_agg_sql,
1538        exp.BitwiseRightShift: _bitshift_sql,
1539        exp.BitwiseXorAgg: _bitwise_agg_sql,
1540        exp.CommentColumnConstraint: no_comment_column_constraint_sql,
1541        exp.Corr: lambda self, e: self._corr_sql(e),
1542        exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
1543        exp.CurrentTime: lambda *_: "CURRENT_TIME",
1544        exp.CurrentSchemas: lambda self, e: self.func(
1545            "current_schemas", e.this if e.this else exp.true()
1546        ),
1547        exp.CurrentTimestamp: lambda self, e: (
1548            self.sql(
1549                exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC"))
1550            )
1551            if e.args.get("sysdate")
1552            else "CURRENT_TIMESTAMP"
1553        ),
1554        exp.CurrentVersion: rename_func("version"),
1555        exp.Localtime: unsupported_args("this")(lambda *_: "LOCALTIME"),
1556        exp.DayOfMonth: rename_func("DAYOFMONTH"),
1557        exp.DayOfWeek: rename_func("DAYOFWEEK"),
1558        exp.DayOfWeekIso: rename_func("ISODOW"),
1559        exp.DayOfYear: rename_func("DAYOFYEAR"),
1560        exp.Dayname: lambda self, e: (
1561            self.func("STRFTIME", e.this, exp.Literal.string("%a"))
1562            if e.args.get("abbreviated")
1563            else self.func("DAYNAME", e.this)
1564        ),
1565        exp.Monthname: lambda self, e: (
1566            self.func("STRFTIME", e.this, exp.Literal.string("%b"))
1567            if e.args.get("abbreviated")
1568            else self.func("MONTHNAME", e.this)
1569        ),
1570        exp.DataType: _datatype_sql,
1571        exp.Date: _date_sql,
1572        exp.DateAdd: _date_delta_to_binary_interval_op(),
1573        exp.DateFromParts: _date_from_parts_sql,
1574        exp.DateSub: _date_delta_to_binary_interval_op(),
1575        exp.DateDiff: _date_diff_sql,
1576        exp.DateStrToDate: datestrtodate_sql,
1577        exp.Datetime: no_datetime_sql,
1578        exp.DatetimeDiff: _date_diff_sql,
1579        exp.DatetimeSub: _date_delta_to_binary_interval_op(),
1580        exp.DatetimeAdd: _date_delta_to_binary_interval_op(),
1581        exp.DateToDi: lambda self, e: (
1582            f"CAST(STRFTIME({self.sql(e, 'this')}, {self.dialect.DATEINT_FORMAT}) AS INT)"
1583        ),
1584        exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
1585        exp.HexDecodeString: lambda self, e: self.sql(exp.Decode(this=exp.Unhex(this=e.this))),
1586        exp.DiToDate: lambda self, e: (
1587            f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {self.dialect.DATEINT_FORMAT}) AS DATE)"
1588        ),
1589        exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
1590        exp.EqualNull: lambda self, e: self.sql(
1591            exp.NullSafeEQ(this=e.this, expression=e.expression)
1592        ),
1593        exp.EuclideanDistance: rename_func("LIST_DISTANCE"),
1594        exp.GenerateDateArray: _generate_datetime_array_sql,
1595        exp.GenerateSeries: generate_series_sql("GENERATE_SERIES", "RANGE"),
1596        exp.GenerateTimestampArray: _generate_datetime_array_sql,
1597        exp.Getbit: getbit_sql,
1598        exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
1599        exp.Explode: rename_func("UNNEST"),
1600        exp.IcebergProperty: lambda *_: "",
1601        exp.IntDiv: lambda self, e: self.binary(e, "//"),
1602        exp.IsInf: rename_func("ISINF"),
1603        exp.IsNan: rename_func("ISNAN"),
1604        exp.IsNullValue: lambda self, e: self.sql(
1605            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("NULL"))
1606        ),
1607        exp.IsArray: lambda self, e: self.sql(
1608            exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("ARRAY"))
1609        ),
1610        exp.Ceil: _ceil_floor,
1611        exp.Floor: _ceil_floor,
1612        exp.JSONBExists: rename_func("JSON_EXISTS"),
1613        exp.JSONExtract: _arrow_json_extract_sql,
1614        exp.JSONExtractArray: _json_extract_value_array_sql,
1615        exp.JSONFormat: _json_format_sql,
1616        exp.JSONValueArray: _json_extract_value_array_sql,
1617        exp.Lateral: _explode_to_unnest_sql,
1618        exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)),
1619        exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)),
1620        exp.Select: transforms.preprocess([_seq_to_range_in_generator]),
1621        exp.Seq1: lambda self, e: _seq_sql(self, e, 1),
1622        exp.Seq2: lambda self, e: _seq_sql(self, e, 2),
1623        exp.Seq4: lambda self, e: _seq_sql(self, e, 4),
1624        exp.Seq8: lambda self, e: _seq_sql(self, e, 8),
1625        exp.BoolxorAgg: _boolxor_agg_sql,
1626        exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
1627        exp.Initcap: _initcap_sql,
1628        exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
1629        exp.SHA: lambda self, e: _sha_sql(self, e, "SHA1"),
1630        exp.SHA1Digest: lambda self, e: _sha_sql(self, e, "SHA1", is_binary=True),
1631        exp.SHA2: lambda self, e: _sha_sql(self, e, "SHA256"),
1632        exp.SHA2Digest: lambda self, e: _sha_sql(self, e, "SHA256", is_binary=True),
1633        exp.MonthsBetween: months_between_sql,
1634        exp.NextDay: _day_navigation_sql,
1635        exp.PercentileCont: rename_func("QUANTILE_CONT"),
1636        exp.PercentileDisc: rename_func("QUANTILE_DISC"),
1637        # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
1638        # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
1639        exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
1640        exp.PreviousDay: _day_navigation_sql,
1641        exp.RegexpILike: lambda self, e: self.func(
1642            "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
1643        ),
1644        exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
1645        exp.RegrValx: _regr_val_sql,
1646        exp.RegrValy: _regr_val_sql,
1647        exp.Return: lambda self, e: self.sql(e, "this"),
1648        exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
1649        exp.StrToUnix: lambda self, e: self.func(
1650            "EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
1651        ),
1652        exp.Struct: _struct_sql,
1653        exp.Transform: rename_func("LIST_TRANSFORM"),
1654        exp.TimeAdd: _date_delta_to_binary_interval_op(),
1655        exp.TimeSub: _date_delta_to_binary_interval_op(),
1656        exp.Time: no_time_sql,
1657        exp.TimeDiff: _timediff_sql,
1658        exp.Timestamp: no_timestamp_sql,
1659        exp.TimestampAdd: _date_delta_to_binary_interval_op(),
1660        exp.TimestampDiff: lambda self, e: self.func(
1661            "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
1662        ),
1663        exp.TimestampSub: _date_delta_to_binary_interval_op(),
1664        exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DType.DATE)),
1665        exp.TimeStrToTime: timestrtotime_sql,
1666        exp.TimeStrToUnix: lambda self, e: self.func(
1667            "EPOCH", exp.cast(e.this, exp.DType.TIMESTAMP)
1668        ),
1669        exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
1670        exp.ToBoolean: _to_boolean_sql,
1671        exp.ToVariant: lambda self, e: self.sql(
1672            exp.cast(e.this, exp.DataType.from_str("VARIANT", dialect="duckdb"))
1673        ),
1674        exp.TimeToUnix: rename_func("EPOCH"),
1675        exp.TsOrDiToDi: lambda self, e: (
1676            f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)"
1677        ),
1678        exp.TsOrDsAdd: _date_delta_to_binary_interval_op(),
1679        exp.TsOrDsDiff: lambda self, e: self.func(
1680            "DATE_DIFF",
1681            f"'{e.args.get('unit') or 'DAY'}'",
1682            exp.cast(e.expression, exp.DType.TIMESTAMP),
1683            exp.cast(e.this, exp.DType.TIMESTAMP),
1684        ),
1685        exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
1686        exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
1687        exp.UnixSeconds: lambda self, e: self.sql(
1688            exp.cast(self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DType.BIGINT)
1689        ),
1690        exp.UnixToStr: lambda self, e: self.func(
1691            "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
1692        ),
1693        exp.DatetimeTrunc: lambda self, e: self.func(
1694            "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DType.DATETIME)
1695        ),
1696        exp.UnixToTime: _unix_to_time_sql,
1697        exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
1698        exp.VariancePop: rename_func("VAR_POP"),
1699        exp.WeekOfYear: rename_func("WEEKOFYEAR"),
1700        exp.YearOfWeek: lambda self, e: self.sql(
1701            exp.Extract(
1702                this=exp.Var(this="ISOYEAR"),
1703                expression=e.this,
1704            )
1705        ),
1706        exp.YearOfWeekIso: lambda self, e: self.sql(
1707            exp.Extract(
1708                this=exp.Var(this="ISOYEAR"),
1709                expression=e.this,
1710            )
1711        ),
1712        exp.Xor: _xor_sql,
1713        exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1714        exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
1715        exp.DateBin: rename_func("TIME_BUCKET"),
1716        exp.LastDay: _last_day_sql,
1717    }
1718
1719    SUPPORTED_JSON_PATH_PARTS = {
1720        exp.JSONPathKey,
1721        exp.JSONPathRoot,
1722        exp.JSONPathSubscript,
1723        exp.JSONPathWildcard,
1724    }
1725
1726    TYPE_MAPPING = {
1727        **generator.Generator.TYPE_MAPPING,
1728        exp.DType.BINARY: "BLOB",
1729        exp.DType.BPCHAR: "TEXT",
1730        exp.DType.CHAR: "TEXT",
1731        exp.DType.DATETIME: "TIMESTAMP",
1732        exp.DType.DECFLOAT: "DECIMAL",
1733        exp.DType.FLOAT: "REAL",
1734        exp.DType.JSONB: "JSON",
1735        exp.DType.NCHAR: "TEXT",
1736        exp.DType.NVARCHAR: "TEXT",
1737        exp.DType.UINT: "UINTEGER",
1738        exp.DType.VARBINARY: "BLOB",
1739        exp.DType.ROWVERSION: "BLOB",
1740        exp.DType.VARCHAR: "TEXT",
1741        exp.DType.TIMESTAMPLTZ: "TIMESTAMPTZ",
1742        exp.DType.TIMESTAMPNTZ: "TIMESTAMP",
1743        exp.DType.TIMESTAMP_S: "TIMESTAMP_S",
1744        exp.DType.TIMESTAMP_MS: "TIMESTAMP_MS",
1745        exp.DType.TIMESTAMP_NS: "TIMESTAMP_NS",
1746        exp.DType.BIGDECIMAL: "DECIMAL",
1747    }
1748
1749    TYPE_PARAM_SETTINGS = {
1750        **generator.Generator.TYPE_PARAM_SETTINGS,
1751        exp.DType.BIGDECIMAL: ((38, 5), (38, 38)),
1752        exp.DType.DECFLOAT: ((38, 5), (38, 38)),
1753    }
1754
1755    # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
1756    RESERVED_KEYWORDS = {
1757        "array",
1758        "analyse",
1759        "union",
1760        "all",
1761        "when",
1762        "in_p",
1763        "default",
1764        "create_p",
1765        "window",
1766        "asymmetric",
1767        "to",
1768        "else",
1769        "localtime",
1770        "from",
1771        "end_p",
1772        "select",
1773        "current_date",
1774        "foreign",
1775        "with",
1776        "grant",
1777        "session_user",
1778        "or",
1779        "except",
1780        "references",
1781        "fetch",
1782        "limit",
1783        "group_p",
1784        "leading",
1785        "into",
1786        "collate",
1787        "offset",
1788        "do",
1789        "then",
1790        "localtimestamp",
1791        "check_p",
1792        "lateral_p",
1793        "current_role",
1794        "where",
1795        "asc_p",
1796        "placing",
1797        "desc_p",
1798        "user",
1799        "unique",
1800        "initially",
1801        "column",
1802        "both",
1803        "some",
1804        "as",
1805        "any",
1806        "only",
1807        "deferrable",
1808        "null_p",
1809        "current_time",
1810        "true_p",
1811        "table",
1812        "case",
1813        "trailing",
1814        "variadic",
1815        "for",
1816        "on",
1817        "distinct",
1818        "false_p",
1819        "not",
1820        "constraint",
1821        "current_timestamp",
1822        "returning",
1823        "primary",
1824        "intersect",
1825        "having",
1826        "analyze",
1827        "current_user",
1828        "and",
1829        "cast",
1830        "symmetric",
1831        "using",
1832        "order",
1833        "current_catalog",
1834    }
1835
1836    UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
1837
1838    # DuckDB doesn't generally support CREATE TABLE .. properties
1839    # https://duckdb.org/docs/sql/statements/create_table.html
1840    # There are a few exceptions (e.g. temporary tables) which are supported or
1841    # can be transpiled to DuckDB, so we explicitly override them accordingly
1842    PROPERTIES_LOCATION = {
1843        **{
1844            prop: exp.Properties.Location.UNSUPPORTED
1845            for prop in generator.Generator.PROPERTIES_LOCATION
1846        },
1847        exp.LikeProperty: exp.Properties.Location.POST_SCHEMA,
1848        exp.TemporaryProperty: exp.Properties.Location.POST_CREATE,
1849        exp.ReturnsProperty: exp.Properties.Location.POST_ALIAS,
1850        exp.SequenceProperties: exp.Properties.Location.POST_EXPRESSION,
1851        exp.IcebergProperty: exp.Properties.Location.POST_CREATE,
1852    }
1853
1854    IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS: t.ClassVar = _IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS
1855
1856    # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1857    ZIPF_TEMPLATE: exp.Expr = exp.maybe_parse(
1858        """
1859        WITH rand AS (SELECT :random_expr AS r),
1860        weights AS (
1861            SELECT i, 1.0 / POWER(i, :s) AS w
1862            FROM RANGE(1, :n + 1) AS t(i)
1863        ),
1864        cdf AS (
1865            SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1866            FROM weights
1867        )
1868        SELECT MIN(i)
1869        FROM cdf
1870        WHERE p >= (SELECT r FROM rand)
1871        """
1872    )
1873
1874    # Template for NORMAL transpilation using Box-Muller transform
1875    # mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2))
1876    NORMAL_TEMPLATE: exp.Expr = exp.maybe_parse(
1877        ":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))"
1878    )
1879
1880    # Template for generating a seeded pseudo-random value in [0, 1) from a hash
1881    SEEDED_RANDOM_TEMPLATE: exp.Expr = exp.maybe_parse("(ABS(HASH(:seed)) % 1000000) / 1000000.0")
1882
1883    # Template for generating signed and unsigned SEQ values within a specified range
1884    SEQ_UNSIGNED: exp.Expr = _SEQ_UNSIGNED
1885    SEQ_SIGNED: exp.Expr = _SEQ_SIGNED
1886
1887    # Template for MAP_CAT transpilation - Snowflake semantics:
1888    # 1. Returns NULL if either input is NULL
1889    # 2. For duplicate keys, prefers non-NULL value (COALESCE(m2[k], m1[k]))
1890    # 3. Filters out entries with NULL values from the result
1891    MAPCAT_TEMPLATE: exp.Expr = exp.maybe_parse(
1892        """
1893        CASE
1894            WHEN :map1 IS NULL OR :map2 IS NULL THEN NULL
1895            ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(
1896                LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(:map1), MAP_KEYS(:map2))),
1897                __k -> STRUCT_PACK(key := __k, value := COALESCE(:map2[__k], :map1[__k]))
1898            ), __x -> __x.value IS NOT NULL))
1899        END
1900        """
1901    )
1902
1903    # Mappings for EXTRACT/DATE_PART transpilation
1904    # Maps Snowflake specifiers unsupported in DuckDB to strftime format codes
1905    EXTRACT_STRFTIME_MAPPINGS: dict[str, tuple[str, str]] = {
1906        "WEEKISO": ("%V", "INTEGER"),
1907        "YEAROFWEEK": ("%G", "INTEGER"),
1908        "YEAROFWEEKISO": ("%G", "INTEGER"),
1909        "NANOSECOND": ("%n", "BIGINT"),
1910    }
1911
1912    # Maps epoch-based specifiers to DuckDB epoch functions
1913    EXTRACT_EPOCH_MAPPINGS: dict[str, str] = {
1914        "EPOCH_SECOND": "EPOCH",
1915        "EPOCH_MILLISECOND": "EPOCH_MS",
1916        "EPOCH_MICROSECOND": "EPOCH_US",
1917        "EPOCH_NANOSECOND": "EPOCH_NS",
1918    }
1919
1920    # Template for BITMAP_CONSTRUCT_AGG transpilation
1921    #
1922    # BACKGROUND:
1923    # Snowflake's BITMAP_CONSTRUCT_AGG aggregates integers into a compact binary bitmap.
1924    # Supports values in range 0-32767, this version returns NULL if any value is out of range
1925    # See: https://docs.snowflake.com/en/sql-reference/functions/bitmap_construct_agg
1926    # See: https://docs.snowflake.com/en/user-guide/querying-bitmaps-for-distinct-counts
1927    #
1928    # Snowflake uses two different formats based on the number of unique values:
1929    #
1930    # Format 1 - Small bitmap (< 5 unique values): Length of 10 bytes
1931    #   Bytes 0-1: Count of values as 2-byte big-endian integer (e.g., 3 values = 0x0003)
1932    #   Bytes 2-9: Up to 4 values, each as 2-byte little-endian integers, zero-padded to 8 bytes
1933    #   Example: Values [1, 2, 3] -> 0x0003 0100 0200 0300 0000 (hex)
1934    #                                count  v1   v2   v3   pad
1935    #
1936    # Format 2 - Large bitmap (>= 5 unique values): Length of 10 + (2 * count) bytes
1937    #   Bytes 0-9: Fixed header 0x08 followed by 9 zero bytes
1938    #   Bytes 10+: Each value as 2-byte little-endian integer (no padding)
1939    #   Example: Values [1,2,3,4,5] -> 0x08 00000000 00000000 00 0100 0200 0300 0400 0500
1940    #                                  hdr  ----9 zero bytes----  v1   v2   v3   v4   v5
1941    #
1942    # TEMPLATE STRUCTURE
1943    #
1944    # Phase 1 - Innermost subquery: Data preparation
1945    #   SELECT LIST_SORT(...) AS l
1946    #   - Aggregates all input values into a list, remove NULLs, duplicates and sorts
1947    #   Result: Clean, sorted list of unique non-null integers stored as 'l'
1948    #
1949    # Phase 2 - Middle subquery: Hex string construction
1950    #   LIST_TRANSFORM(...)
1951    #   - Converts each integer to 2-byte little-endian hex representation
1952    #   - & 255 extracts low byte, >> 8 extracts high byte
1953    #   - LIST_REDUCE: Concatenates all hex pairs into single string 'h'
1954    #   Result: Hex string of all values
1955    #
1956    # Phase 3 - Outer SELECT: Final bitmap assembly
1957    #   LENGTH(l) < 5:
1958    #   - Small format: 2-byte count (big-endian via %04X) + values + zero padding
1959    #   LENGTH(l) >= 5:
1960    #   - Large format: Fixed 10-byte header + values (no padding needed)
1961    #   Result: Complete binary bitmap as BLOB
1962    #
1963    BITMAP_CONSTRUCT_AGG_TEMPLATE: exp.Expr = exp.maybe_parse(
1964        """
1965        SELECT CASE
1966            WHEN l IS NULL OR LENGTH(l) = 0 THEN NULL
1967            WHEN LENGTH(l) != LENGTH(LIST_FILTER(l, __v -> __v BETWEEN 0 AND 32767)) THEN NULL
1968            WHEN LENGTH(l) < 5 THEN UNHEX(PRINTF('%04X', LENGTH(l)) || h || REPEAT('00', GREATEST(0, 4 - LENGTH(l)) * 2))
1969            ELSE UNHEX('08000000000000000000' || h)
1970        END
1971        FROM (
1972            SELECT l, COALESCE(LIST_REDUCE(
1973                LIST_TRANSFORM(l, __x -> PRINTF('%02X%02X', CAST(__x AS INT) & 255, (CAST(__x AS INT) >> 8) & 255)),
1974                (__a, __b) -> __a || __b, ''
1975            ), '') AS h
1976            FROM (SELECT LIST_SORT(LIST_DISTINCT(LIST(:arg) FILTER(NOT :arg IS NULL))) AS l)
1977        )
1978        """
1979    )
1980
1981    # Template for RANDSTR transpilation - placeholders get replaced with actual parameters
1982    RANDSTR_TEMPLATE: exp.Expr = exp.maybe_parse(
1983        f"""
1984        SELECT LISTAGG(
1985            SUBSTRING(
1986                '{RANDSTR_CHAR_POOL}',
1987                1 + CAST(FLOOR(random_value * 62) AS INT),
1988                1
1989            ),
1990            ''
1991        )
1992        FROM (
1993            SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
1994            FROM RANGE(:length) AS t(i)
1995        )
1996        """,
1997    )
1998
1999    # Template for MINHASH transpilation
2000    # Computes k minimum hash values across aggregated data using DuckDB list functions
2001    # Returns JSON matching Snowflake format: {"state": [...], "type": "minhash", "version": 1}
2002    MINHASH_TEMPLATE: exp.Expr = exp.maybe_parse(
2003        """
2004        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY seed), 'type', 'minhash', 'version', 1)
2005        FROM (
2006            SELECT seed, LIST_MIN(LIST_TRANSFORM(vals, __v -> HASH(CAST(__v AS VARCHAR) || CAST(seed AS VARCHAR)))) AS min_h
2007            FROM (SELECT LIST(:expr) AS vals), RANGE(0, :k) AS t(seed)
2008        )
2009        """,
2010    )
2011
2012    # Template for MINHASH_COMBINE transpilation
2013    # Combines multiple minhash signatures by taking element-wise minimum
2014    MINHASH_COMBINE_TEMPLATE: exp.Expr = exp.maybe_parse(
2015        """
2016        SELECT JSON_OBJECT('state', LIST(min_h ORDER BY idx), 'type', 'minhash', 'version', 1)
2017        FROM (
2018            SELECT
2019                pos AS idx,
2020                MIN(val) AS min_h
2021            FROM
2022                UNNEST(LIST(:expr)) AS _(sig),
2023                UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS t(val, pos)
2024            GROUP BY pos
2025        )
2026        """,
2027    )
2028
2029    # Template for APPROXIMATE_SIMILARITY transpilation
2030    # Computes multi-way Jaccard similarity: fraction of positions where ALL signatures agree
2031    APPROXIMATE_SIMILARITY_TEMPLATE: exp.Expr = exp.maybe_parse(
2032        """
2033        SELECT CAST(SUM(CASE WHEN num_distinct = 1 THEN 1 ELSE 0 END) AS DOUBLE) / COUNT(*)
2034        FROM (
2035            SELECT pos, COUNT(DISTINCT h) AS num_distinct
2036            FROM (
2037                SELECT h, pos
2038                FROM UNNEST(LIST(:expr)) AS _(sig),
2039                     UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS s(h, pos)
2040            )
2041            GROUP BY pos
2042        )
2043        """,
2044    )
2045
2046    # Template for ARRAYS_ZIP transpilation
2047    # Snowflake pads to longest array; DuckDB LIST_ZIP truncates to shortest
2048    # Uses RANGE + indexing to match Snowflake behavior
2049    ARRAYS_ZIP_TEMPLATE: exp.Expr = exp.maybe_parse(
2050        """
2051        CASE WHEN :null_check THEN NULL
2052        WHEN :all_empty_check THEN [:empty_struct]
2053        ELSE LIST_TRANSFORM(RANGE(0, :max_len), __i -> :transform_struct)
2054        END
2055        """,
2056    )
2057
2058    UUID_V5_TEMPLATE: exp.Expr = exp.maybe_parse(
2059        """
2060        (SELECT
2061            LOWER(
2062                SUBSTR(h, 1, 8) || '-' ||
2063                SUBSTR(h, 9, 4) || '-' ||
2064                '5' || SUBSTR(h, 14, 3) || '-' ||
2065                FORMAT('{:02x}', CAST('0x' || SUBSTR(h, 17, 2) AS INT) & 63 | 128) || SUBSTR(h, 19, 2) || '-' ||
2066                SUBSTR(h, 21, 12)
2067            )
2068        FROM (
2069            SELECT SUBSTR(SHA1(UNHEX(REPLACE(:namespace, '-', '')) || ENCODE(:name, 'utf8')), 1, 32) AS h
2070        ))
2071        """
2072    )
2073
2074    # Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
2075    # Each element is paired with its 1-based position via LIST_ZIP, then filtered
2076    # by a comparison operator (supplied via :cond) that determines the operation:
2077    #   EXCEPT (>):        keep the N-th occurrence only if N > count in arr2
2078    #                      e.g. [2,2,2] EXCEPT [2,2] -> [2]
2079    #   INTERSECTION (<=): keep the N-th occurrence only if N <= count in arr2
2080    #                      e.g. [2,2,2] INTERSECT [2,2] -> [2,2]
2081    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2082    ARRAY_BAG_TEMPLATE: exp.Expr = exp.maybe_parse(
2083        """
2084        CASE
2085            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2086            ELSE LIST_TRANSFORM(
2087                LIST_FILTER(
2088                    LIST_ZIP(:arr1, GENERATE_SERIES(1, LEN(:arr1))),
2089                    pair -> :cond
2090                ),
2091                pair -> pair[0]
2092            )
2093        END
2094        """
2095    )
2096
2097    ARRAY_EXCEPT_CONDITION: exp.Expr = exp.maybe_parse(
2098        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2099        " > LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2100    )
2101
2102    ARRAY_INTERSECTION_CONDITION: exp.Expr = exp.maybe_parse(
2103        "LEN(LIST_FILTER(:arr1[1:pair[1]], e -> e IS NOT DISTINCT FROM pair[0]))"
2104        " <= LEN(LIST_FILTER(:arr2, e -> e IS NOT DISTINCT FROM pair[0]))"
2105    )
2106
2107    # Set semantics for ARRAY_EXCEPT. Deduplicates arr1 via LIST_DISTINCT, then
2108    # filters out any element that appears at least once in arr2.
2109    #   e.g. [1,1,2,3] EXCEPT [1] -> [2,3]
2110    # IS NOT DISTINCT FROM is used for NULL-safe element comparison.
2111    ARRAY_EXCEPT_SET_TEMPLATE: exp.Expr = exp.maybe_parse(
2112        """
2113        CASE
2114            WHEN :arr1 IS NULL OR :arr2 IS NULL THEN NULL
2115            ELSE LIST_FILTER(
2116                LIST_DISTINCT(:arr1),
2117                e -> LEN(LIST_FILTER(:arr2, x -> x IS NOT DISTINCT FROM e)) = 0
2118            )
2119        END
2120        """
2121    )
2122
2123    STRTOK_TO_ARRAY_TEMPLATE: exp.Expr = exp.maybe_parse(
2124        """
2125        CASE WHEN :delimiter IS NULL THEN NULL
2126        ELSE LIST_FILTER(
2127            REGEXP_SPLIT_TO_ARRAY(:string, CASE WHEN :delimiter = '' THEN '.^' ELSE CONCAT('[', :escaped, ']') END),
2128            x -> NOT x = ''
2129        ) END
2130        """
2131    )
2132
2133    # Template for STRTOK function transpilation
2134    #
2135    # DuckDB itself doesn't have a strtok function. This handles the transpilation from Snowflake to DuckDB.
2136    # We may need to adjust this if we want to support transpilation from other dialects
2137    #
2138    # CASE
2139    #     -- Snowflake: empty delimiter + empty input string -> NULL
2140    #     WHEN delimiter = '' AND input_str = '' THEN NULL
2141    #
2142    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return input string if index is 1
2143    #     WHEN delimiter = '' AND index = 1 THEN input_str
2144    #
2145    #     -- Snowflake: empty delimiter + non-empty input string -> treats whole input as 1 token -> return NULL if index is not 1
2146    #     WHEN delimiter = '' THEN NULL
2147    #
2148    #     -- Snowflake: negative indices return NULL
2149    #     WHEN index < 0 THEN NULL
2150    #
2151    #     -- Snowflake: return NULL if any argument is NULL
2152    #     WHEN input_str IS NULL OR delimiter IS NULL OR index IS NULL THEN NULL
2153    #
2154    #
2155    #     ELSE LIST_FILTER(
2156    #         REGEXP_SPLIT_TO_ARRAY(
2157    #             input_str,
2158    #             CASE
2159    #                 -- if delimiter is '', we don't want to surround it with '[' and ']' as '[]' is invalid for DuckDB
2160    #                 WHEN delimiter = '' THEN ''
2161    #
2162    #                 -- handle problematic regex characters in delimiter with REGEXP_REPLACE
2163    #                 -- turn delimiter into a regex char set, otherwise DuckDB will match in order, which we don't want
2164    #                 ELSE '[' || REGEXP_REPLACE(delimiter, problematic_char_set, '\\\1', 'g') || ']'
2165    #             END
2166    #         ),
2167    #
2168    #         -- Snowflake: don't return empty strings
2169    #         x -> NOT x = ''
2170    #     )[index]
2171    # END
2172    STRTOK_TEMPLATE: exp.Expr = exp.maybe_parse(
2173        """
2174        CASE
2175            WHEN :delimiter = '' AND :string = '' THEN NULL
2176            WHEN :delimiter = '' AND :part_index = 1 THEN :string
2177            WHEN :delimiter = '' THEN NULL
2178            WHEN :part_index < 0 THEN NULL
2179            WHEN :string IS NULL OR :delimiter IS NULL OR :part_index IS NULL THEN NULL
2180            ELSE :base_func
2181        END
2182        """
2183    )
2184
2185    def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str:
2186        cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2))
2187        return self.sql(
2188            exp.replace_placeholders(self.ARRAY_BAG_TEMPLATE, arr1=arr1, arr2=arr2, cond=cond)
2189        )
2190
2191    def timeslice_sql(self, expression: exp.TimeSlice) -> str:
2192        """
2193        Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
2194
2195        Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
2196        DuckDB:    time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
2197
2198        For 'END' kind, add the interval to get the end of the slice.
2199        For DATE type with 'END', cast result back to DATE to preserve type.
2200        """
2201        date_expr = expression.this
2202        slice_length = expression.expression
2203        unit = expression.unit
2204        kind = expression.text("kind").upper()
2205
2206        # Create INTERVAL expression: INTERVAL 'N' UNIT
2207        interval_expr = exp.Interval(this=slice_length, unit=unit)
2208
2209        # Create base time_bucket expression
2210        time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
2211
2212        # Check if we need the end of the slice (default is start)
2213        if not kind == "END":
2214            # For 'START', return time_bucket directly
2215            return self.sql(time_bucket_expr)
2216
2217        # For 'END', add the interval to get end of slice
2218        add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
2219
2220        # If input is DATE type, cast result back to DATE to preserve type
2221        # DuckDB converts DATE to TIMESTAMP when adding intervals
2222        if date_expr.is_type(exp.DType.DATE):
2223            return self.sql(exp.cast(add_expr, exp.DType.DATE))
2224
2225        return self.sql(add_expr)
2226
2227    def bitmapbucketnumber_sql(self, expression: exp.BitmapBucketNumber) -> str:
2228        """
2229        Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
2230
2231        Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
2232        - Each bucket covers 32,768 values
2233        - Bucket numbering starts at 1
2234        - Formula: ((value - 1) // 32768) + 1 for positive values
2235
2236        For non-positive values (0 and negative), we use value // 32768 to avoid
2237        producing bucket 0 or positive bucket IDs for negative inputs.
2238        """
2239        value = expression.this
2240
2241        positive_formula = ((value - 1) // 32768) + 1
2242        non_positive_formula = value // 32768
2243
2244        # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
2245        case_expr = (
2246            exp.case()
2247            .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
2248            .else_(non_positive_formula)
2249        )
2250        return self.sql(case_expr)
2251
2252    def bitmapbitposition_sql(self, expression: exp.BitmapBitPosition) -> str:
2253        """
2254        Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
2255
2256        Snowflake's BITMAP_BIT_POSITION behavior:
2257        - For n <= 0: returns ABS(n) % 32768
2258        - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
2259        """
2260        this = expression.this
2261
2262        return self.sql(
2263            exp.Mod(
2264                this=exp.Paren(
2265                    this=exp.If(
2266                        this=exp.GT(this=this, expression=exp.Literal.number(0)),
2267                        true=this - exp.Literal.number(1),
2268                        false=exp.Abs(this=this),
2269                    )
2270                ),
2271                expression=MAX_BIT_POSITION,
2272            )
2273        )
2274
2275    def bitmapconstructagg_sql(self, expression: exp.BitmapConstructAgg) -> str:
2276        """
2277        Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
2278        Uses a pre-parsed template with placeholders replaced by expression nodes.
2279
2280        Snowflake bitmap format:
2281        - Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
2282        - Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
2283        """
2284        arg = expression.this
2285        return (
2286            f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
2287        )
2288
2289    def compress_sql(self, expression: exp.Compress) -> str:
2290        self.unsupported("DuckDB does not support the COMPRESS() function")
2291        return self.function_fallback_sql(expression)
2292
2293    def encrypt_sql(self, expression: exp.Encrypt) -> str:
2294        self.unsupported("ENCRYPT is not supported in DuckDB")
2295        return self.function_fallback_sql(expression)
2296
2297    def decrypt_sql(self, expression: exp.Decrypt) -> str:
2298        func_name = "TRY_DECRYPT" if expression.args.get("safe") else "DECRYPT"
2299        self.unsupported(f"{func_name} is not supported in DuckDB")
2300        return self.function_fallback_sql(expression)
2301
2302    def decryptraw_sql(self, expression: exp.DecryptRaw) -> str:
2303        func_name = "TRY_DECRYPT_RAW" if expression.args.get("safe") else "DECRYPT_RAW"
2304        self.unsupported(f"{func_name} is not supported in DuckDB")
2305        return self.function_fallback_sql(expression)
2306
2307    def encryptraw_sql(self, expression: exp.EncryptRaw) -> str:
2308        self.unsupported("ENCRYPT_RAW is not supported in DuckDB")
2309        return self.function_fallback_sql(expression)
2310
2311    def parseurl_sql(self, expression: exp.ParseUrl) -> str:
2312        self.unsupported("PARSE_URL is not supported in DuckDB")
2313        return self.function_fallback_sql(expression)
2314
2315    def parseip_sql(self, expression: exp.ParseIp) -> str:
2316        self.unsupported("PARSE_IP is not supported in DuckDB")
2317        return self.function_fallback_sql(expression)
2318
2319    def decompressstring_sql(self, expression: exp.DecompressString) -> str:
2320        self.unsupported("DECOMPRESS_STRING is not supported in DuckDB")
2321        return self.function_fallback_sql(expression)
2322
2323    def decompressbinary_sql(self, expression: exp.DecompressBinary) -> str:
2324        self.unsupported("DECOMPRESS_BINARY is not supported in DuckDB")
2325        return self.function_fallback_sql(expression)
2326
2327    def jarowinklersimilarity_sql(self, expression: exp.JarowinklerSimilarity) -> str:
2328        this = expression.this
2329        expr = expression.expression
2330
2331        if expression.args.get("case_insensitive"):
2332            this = exp.Upper(this=this)
2333            expr = exp.Upper(this=expr)
2334
2335        result = exp.func("JARO_WINKLER_SIMILARITY", this, expr)
2336
2337        if expression.args.get("integer_scale"):
2338            result = exp.cast(result * 100, "INTEGER")
2339
2340        return self.sql(result)
2341
2342    def nthvalue_sql(self, expression: exp.NthValue) -> str:
2343        from_first = expression.args.get("from_first", True)
2344        if not from_first:
2345            self.unsupported("DuckDB's NTH_VALUE doesn't support starting from the end ")
2346
2347        return self.function_fallback_sql(expression)
2348
2349    def randstr_sql(self, expression: exp.Randstr) -> str:
2350        """
2351        Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
2352        Uses a pre-parsed template with placeholders replaced by expression nodes.
2353
2354        RANDSTR(length, generator) generates a random string of specified length.
2355        - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
2356        - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
2357        - No generator: Use default seed value
2358        """
2359        length = expression.this
2360        generator = expression.args.get("generator")
2361
2362        if generator:
2363            if isinstance(generator, exp.Rand):
2364                # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
2365                seed_value = generator.this or generator
2366            else:
2367                # Const/int or other expression - use as seed directly
2368                seed_value = generator
2369        else:
2370            # No generator specified, use default seed (arbitrary but deterministic)
2371            seed_value = exp.Literal.number(RANDSTR_SEED)
2372
2373        replacements = {"seed": seed_value, "length": length}
2374        return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
2375
2376    @unsupported_args("finish")
2377    def reduce_sql(self, expression: exp.Reduce) -> str:
2378        array_arg = expression.this
2379        initial_value = expression.args.get("initial")
2380        merge_lambda = expression.args.get("merge")
2381
2382        if merge_lambda:
2383            merge_lambda.set("colon", True)
2384
2385        return self.func("list_reduce", array_arg, merge_lambda, initial_value)
2386
2387    def zipf_sql(self, expression: exp.Zipf) -> str:
2388        """
2389        Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
2390        Uses a pre-parsed template with placeholders replaced by expression nodes.
2391        """
2392        s = expression.this
2393        n = expression.args["elementcount"]
2394        gen = expression.args["gen"]
2395
2396        if not isinstance(gen, exp.Rand):
2397            # (ABS(HASH(seed)) % 1000000) / 1000000.0
2398            random_expr: exp.Expr = exp.Div(
2399                this=exp.Paren(
2400                    this=exp.Mod(
2401                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
2402                        expression=exp.Literal.number(1000000),
2403                    )
2404                ),
2405                expression=exp.Literal.number(1000000.0),
2406            )
2407        else:
2408            # Use RANDOM() for non-deterministic output
2409            random_expr = exp.Rand()
2410
2411        replacements = {"s": s, "n": n, "random_expr": random_expr}
2412        return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
2413
2414    def tobinary_sql(self, expression: exp.ToBinary) -> str:
2415        """
2416        TO_BINARY and TRY_TO_BINARY transpilation:
2417        - 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
2418        - 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
2419        - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')
2420
2421        For TRY_TO_BINARY (safe=True), wrap with TRY():
2422        - 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
2423        """
2424        value = expression.this
2425        format_arg = expression.args.get("format")
2426        is_safe = expression.args.get("safe")
2427        is_binary = _is_binary(expression)
2428
2429        if not format_arg and not is_binary:
2430            func_name = "TRY_TO_BINARY" if is_safe else "TO_BINARY"
2431            return self.func(func_name, value)
2432
2433        # Snowflake defaults to HEX encoding when no format is specified
2434        fmt = format_arg.name.upper() if format_arg else "HEX"
2435
2436        if fmt in ("UTF-8", "UTF8"):
2437            # DuckDB ENCODE always uses UTF-8, no charset parameter needed
2438            result = self.func("ENCODE", value)
2439        elif fmt == "BASE64":
2440            result = self.func("FROM_BASE64", value)
2441        elif fmt == "HEX":
2442            result = self.func("UNHEX", value)
2443        else:
2444            if is_safe:
2445                return self.sql(exp.null())
2446            else:
2447                self.unsupported(f"format {fmt} is not supported")
2448                result = self.func("TO_BINARY", value)
2449        return f"TRY({result})" if is_safe else result
2450
2451    def tonumber_sql(self, expression: exp.ToNumber) -> str:
2452        fmt = expression.args.get("format")
2453        precision = expression.args.get("precision")
2454        scale = expression.args.get("scale")
2455
2456        if not fmt and precision and scale:
2457            return self.sql(
2458                exp.cast(
2459                    expression.this, f"DECIMAL({precision.name}, {scale.name})", dialect="duckdb"
2460                )
2461            )
2462
2463        return super().tonumber_sql(expression)
2464
2465    def _greatest_least_sql(self, expression: exp.Greatest | exp.Least) -> str:
2466        """
2467        Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
2468
2469        - If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL
2470        - If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
2471        """
2472        # Get all arguments
2473        all_args = [expression.this, *expression.expressions]
2474        fallback_sql = self.function_fallback_sql(expression)
2475
2476        if expression.args.get("ignore_nulls"):
2477            # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
2478            return self.sql(fallback_sql)
2479
2480        # return NULL if any argument is NULL
2481        case_expr = exp.case().when(
2482            exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
2483            exp.null(),
2484            copy=False,
2485        )
2486        case_expr.set("default", fallback_sql)
2487        return self.sql(case_expr)
2488
2489    def generator_sql(self, expression: exp.Generator) -> str:
2490        # Transpile Snowflake GENERATOR to DuckDB range()
2491        rowcount = expression.args.get("rowcount")
2492        time_limit = expression.args.get("time_limit")
2493
2494        if time_limit:
2495            self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
2496
2497        if not rowcount:
2498            self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
2499            return self.func("range", exp.Literal.number(0))
2500
2501        return self.func("range", rowcount)
2502
2503    def greatest_sql(self, expression: exp.Greatest) -> str:
2504        return self._greatest_least_sql(expression)
2505
2506    def least_sql(self, expression: exp.Least) -> str:
2507        return self._greatest_least_sql(expression)
2508
2509    def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str:
2510        if expression.args.get("colon"):
2511            prefix = "LAMBDA "
2512            arrow_sep = ":"
2513            wrap = False
2514        else:
2515            prefix = ""
2516
2517        lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap)
2518        return f"{prefix}{lambda_sql}"
2519
2520    def show_sql(self, expression: exp.Show) -> str:
2521        from_ = self.sql(expression, "from_")
2522        from_ = f" FROM {from_}" if from_ else ""
2523        return f"SHOW {expression.name}{from_}"
2524
2525    def soundex_sql(self, expression: exp.Soundex) -> str:
2526        self.unsupported("SOUNDEX is not supported in DuckDB")
2527        return self.func("SOUNDEX", expression.this)
2528
2529    def sortarray_sql(self, expression: exp.SortArray) -> str:
2530        arr = expression.this
2531        asc = expression.args.get("asc")
2532        nulls_first = expression.args.get("nulls_first")
2533
2534        if not isinstance(asc, exp.Boolean) and not isinstance(nulls_first, exp.Boolean):
2535            return self.func("LIST_SORT", arr, asc, nulls_first)
2536
2537        nulls_are_first = nulls_first == exp.true()
2538        nulls_first_sql = exp.Literal.string("NULLS FIRST") if nulls_are_first else None
2539
2540        if not isinstance(asc, exp.Boolean):
2541            return self.func("LIST_SORT", arr, asc, nulls_first_sql)
2542
2543        descending = asc == exp.false()
2544
2545        if not descending and not nulls_are_first:
2546            return self.func("LIST_SORT", arr)
2547        if not nulls_are_first:
2548            return self.func("ARRAY_REVERSE_SORT", arr)
2549        return self.func(
2550            "LIST_SORT",
2551            arr,
2552            exp.Literal.string("DESC" if descending else "ASC"),
2553            exp.Literal.string("NULLS FIRST"),
2554        )
2555
2556    def install_sql(self, expression: exp.Install) -> str:
2557        force = "FORCE " if expression.args.get("force") else ""
2558        this = self.sql(expression, "this")
2559        from_clause = expression.args.get("from_")
2560        from_clause = f" FROM {from_clause}" if from_clause else ""
2561        return f"{force}INSTALL {this}{from_clause}"
2562
2563    def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
2564        self.unsupported(
2565            "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
2566        )
2567        return self.function_fallback_sql(expression)
2568
2569    def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
2570        return self.sql(exp.cast(expression.this, exp.DType.TIMESTAMPTZ))
2571
2572    def strposition_sql(self, expression: exp.StrPosition) -> str:
2573        this = expression.this
2574        substr = expression.args.get("substr")
2575        position = expression.args.get("position")
2576
2577        # For BINARY/BLOB: DuckDB's STRPOS doesn't support BLOB types
2578        # Convert to HEX strings, use STRPOS, then convert hex position to byte position
2579        if _is_binary(this):
2580            # Build expression: STRPOS(HEX(haystack), HEX(needle))
2581            hex_strpos = exp.StrPosition(
2582                this=exp.Hex(this=this),
2583                substr=exp.Hex(this=substr),
2584            )
2585
2586            return self.sql(exp.cast((hex_strpos + 1) / 2, exp.DType.INT))
2587
2588        # For VARCHAR: handle clamp_position
2589        if expression.args.get("clamp_position") and position:
2590            expression = expression.copy()
2591            expression.set(
2592                "position",
2593                exp.If(
2594                    this=exp.LTE(this=position, expression=exp.Literal.number(0)),
2595                    true=exp.Literal.number(1),
2596                    false=position.copy(),
2597                ),
2598            )
2599
2600        return strposition_sql(self, expression)
2601
2602    def substring_sql(self, expression: exp.Substring) -> str:
2603        if expression.args.get("zero_start"):
2604            start = expression.args.get("start")
2605            length = expression.args.get("length")
2606
2607            if start := expression.args.get("start"):
2608                start = exp.If(this=start.eq(0), true=exp.Literal.number(1), false=start)
2609            if length := expression.args.get("length"):
2610                length = exp.If(this=length < 0, true=exp.Literal.number(0), false=length)
2611
2612            return self.func("SUBSTRING", expression.this, start, length)
2613
2614        return self.function_fallback_sql(expression)
2615
2616    def strtotime_sql(self, expression: exp.StrToTime) -> str:
2617        # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
2618        target_type = expression.args.get("target_type")
2619        needs_tz = target_type and target_type.this in (
2620            exp.DType.TIMESTAMPLTZ,
2621            exp.DType.TIMESTAMPTZ,
2622        )
2623
2624        if expression.args.get("safe"):
2625            formatted_time = self.format_time(expression)
2626            cast_type = exp.DType.TIMESTAMPTZ if needs_tz else exp.DType.TIMESTAMP
2627            return self.sql(
2628                exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
2629            )
2630
2631        base_sql = str_to_time_sql(self, expression)
2632        if needs_tz:
2633            return self.sql(
2634                exp.cast(
2635                    base_sql,
2636                    exp.DataType(this=exp.DType.TIMESTAMPTZ),
2637                )
2638            )
2639        return base_sql
2640
2641    def strtodate_sql(self, expression: exp.StrToDate) -> str:
2642        formatted_time = self.format_time(expression)
2643        function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
2644        return self.sql(
2645            exp.cast(
2646                self.func(function_name, expression.this, formatted_time),
2647                exp.DataType(this=exp.DType.DATE),
2648            )
2649        )
2650
2651    def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
2652        this = expression.this
2653        time_format = self.format_time(expression)
2654        safe = expression.args.get("safe")
2655        time_type = exp.DataType.from_str("TIME", dialect="duckdb")
2656        cast_expr = exp.TryCast if safe else exp.Cast
2657
2658        if time_format:
2659            func_name = "TRY_STRPTIME" if safe else "STRPTIME"
2660            strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
2661            return self.sql(cast_expr(this=strptime, to=time_type))
2662
2663        if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME):
2664            return self.sql(this)
2665
2666        return self.sql(cast_expr(this=this, to=time_type))
2667
2668    def currentdate_sql(self, expression: exp.CurrentDate) -> str:
2669        if not expression.this:
2670            return "CURRENT_DATE"
2671
2672        expr = exp.Cast(
2673            this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
2674            to=exp.DataType(this=exp.DType.DATE),
2675        )
2676        return self.sql(expr)
2677
2678    def checkjson_sql(self, expression: exp.CheckJson) -> str:
2679        arg = expression.this
2680        return self.sql(
2681            exp.case()
2682            .when(
2683                exp.or_(arg.is_(exp.Null()), arg.eq(""), exp.func("json_valid", arg)),
2684                exp.null(),
2685            )
2686            .else_(exp.Literal.string("Invalid JSON"))
2687        )
2688
2689    def parsejson_sql(self, expression: exp.ParseJSON) -> str:
2690        arg = expression.this
2691        if expression.args.get("safe"):
2692            return self.sql(
2693                exp.case()
2694                .when(exp.func("json_valid", arg), exp.cast(arg.copy(), "JSON"))
2695                .else_(exp.null())
2696            )
2697        return self.func("JSON", arg)
2698
2699    def unicode_sql(self, expression: exp.Unicode) -> str:
2700        if expression.args.get("empty_is_zero"):
2701            return self.sql(
2702                exp.case()
2703                .when(expression.this.eq(exp.Literal.string("")), exp.Literal.number(0))
2704                .else_(exp.Anonymous(this="UNICODE", expressions=[expression.this]))
2705            )
2706
2707        return self.func("UNICODE", expression.this)
2708
2709    def stripnullvalue_sql(self, expression: exp.StripNullValue) -> str:
2710        return self.sql(
2711            exp.case()
2712            .when(exp.func("json_type", expression.this).eq("NULL"), exp.null())
2713            .else_(expression.this)
2714        )
2715
2716    def trunc_sql(self, expression: exp.Trunc) -> str:
2717        decimals = expression.args.get("decimals")
2718        if (
2719            expression.args.get("fractions_supported")
2720            and decimals
2721            and not decimals.is_type(exp.DType.INT)
2722        ):
2723            decimals = exp.cast(decimals, exp.DType.INT, dialect="duckdb")
2724
2725        return self.func("TRUNC", expression.this, decimals)
2726
2727    def normal_sql(self, expression: exp.Normal) -> str:
2728        """
2729        Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
2730
2731        Uses the Box-Muller transform via NORMAL_TEMPLATE.
2732        """
2733        mean = expression.this
2734        stddev = expression.args["stddev"]
2735        gen: exp.Expr = expression.args["gen"]
2736
2737        # Build two uniform random values [0, 1) for Box-Muller transform
2738        if isinstance(gen, exp.Rand) and gen.this is None:
2739            u1: exp.Expr = exp.Rand()
2740            u2: exp.Expr = exp.Rand()
2741        else:
2742            # Seeded: derive two values using HASH with different inputs
2743            seed = gen.this if isinstance(gen, exp.Rand) else gen
2744            u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
2745            u2 = exp.replace_placeholders(
2746                self.SEEDED_RANDOM_TEMPLATE,
2747                seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
2748            )
2749
2750        replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
2751        return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))
2752
2753    def uniform_sql(self, expression: exp.Uniform) -> str:
2754        """
2755        Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
2756
2757        UNIFORM returns a random value in [min, max]:
2758        - Integer result if both min and max are integers
2759        - Float result if either min or max is a float
2760        """
2761        min_val = expression.this
2762        max_val = expression.expression
2763        gen = expression.args.get("gen")
2764
2765        # Determine if result should be integer (both bounds are integers).
2766        # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
2767        is_int_result = min_val.is_int and max_val.is_int
2768
2769        # Build the random value expression [0, 1)
2770        if not isinstance(gen, exp.Rand):
2771            # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
2772            random_expr: exp.Expr = exp.Div(
2773                this=exp.Paren(
2774                    this=exp.Mod(
2775                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
2776                        expression=exp.Literal.number(1000000),
2777                    )
2778                ),
2779                expression=exp.Literal.number(1000000.0),
2780            )
2781        else:
2782            random_expr = exp.Rand()
2783
2784        # Build: min + random * (max - min [+ 1 for int])
2785        range_expr: exp.Expr = exp.Sub(this=max_val, expression=min_val)
2786        if is_int_result:
2787            range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
2788
2789        result: exp.Expr = exp.Add(
2790            this=min_val,
2791            expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
2792        )
2793
2794        if is_int_result:
2795            result = exp.Cast(this=exp.Floor(this=result), to=exp.DType.BIGINT.into_expr())
2796
2797        return self.sql(result)
2798
2799    def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
2800        nano = expression.args.get("nano")
2801        overflow = expression.args.get("overflow")
2802
2803        # Snowflake's TIME_FROM_PARTS supports overflow
2804        if overflow:
2805            hour = expression.args["hour"]
2806            minute = expression.args["min"]
2807            sec = expression.args["sec"]
2808
2809            # Check if values are within normal ranges - use MAKE_TIME for efficiency
2810            if not nano and all(arg.is_int for arg in [hour, minute, sec]):
2811                try:
2812                    h_val = hour.to_py()
2813                    m_val = minute.to_py()
2814                    s_val = sec.to_py()
2815                    if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
2816                        return rename_func("MAKE_TIME")(self, expression)
2817                except ValueError:
2818                    pass
2819
2820            # Overflow or nanoseconds detected - use INTERVAL arithmetic
2821            if nano:
2822                sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
2823
2824            total_seconds = hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
2825
2826            return self.sql(
2827                exp.Add(
2828                    this=exp.Cast(
2829                        this=exp.Literal.string("00:00:00"), to=exp.DType.TIME.into_expr()
2830                    ),
2831                    expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
2832                )
2833            )
2834
2835        # Default: MAKE_TIME
2836        if nano:
2837            expression.set(
2838                "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
2839            )
2840
2841        return rename_func("MAKE_TIME")(self, expression)
2842
2843    def extract_sql(self, expression: exp.Extract) -> str:
2844        """
2845        Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
2846
2847        DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
2848        EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
2849        """
2850        this = expression.this
2851        datetime_expr = expression.expression
2852
2853        # TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
2854        # because Snowflake applies server timezone while DuckDB uses local timezone
2855        if datetime_expr.is_type(exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ):
2856            self.unsupported(
2857                "EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
2858            )
2859
2860        part_name = this.name.upper()
2861
2862        if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
2863            fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
2864
2865            # Problem: strftime doesn't accept TIME and there's no NANOSECOND function
2866            # So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
2867            is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
2868                exp.DType.TIME, exp.DType.TIMETZ
2869            )
2870
2871            if is_nano_time:
2872                self.unsupported("Parameter NANOSECOND is not supported with TIME type in DuckDB")
2873                return self.sql(
2874                    exp.cast(
2875                        exp.Mul(
2876                            this=exp.Extract(this=exp.var("MICROSECOND"), expression=datetime_expr),
2877                            expression=exp.Literal.number(1000),
2878                        ),
2879                        exp.DataType.from_str(cast_type, dialect="duckdb"),
2880                    )
2881                )
2882
2883            # For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
2884            strftime_input = datetime_expr
2885            if part_name == "NANOSECOND":
2886                strftime_input = exp.cast(datetime_expr, exp.DType.TIMESTAMP_NS)
2887
2888            return self.sql(
2889                exp.cast(
2890                    exp.Anonymous(
2891                        this="STRFTIME",
2892                        expressions=[strftime_input, exp.Literal.string(fmt)],
2893                    ),
2894                    exp.DataType.from_str(cast_type, dialect="duckdb"),
2895                )
2896            )
2897
2898        if part_name in self.EXTRACT_EPOCH_MAPPINGS:
2899            func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
2900            result: exp.Expr = exp.Anonymous(this=func_name, expressions=[datetime_expr])
2901            # EPOCH returns float, cast to BIGINT for integer result
2902            if part_name == "EPOCH_SECOND":
2903                result = exp.cast(result, exp.DataType.from_str("BIGINT", dialect="duckdb"))
2904            return self.sql(result)
2905
2906        return super().extract_sql(expression)
2907
2908    def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
2909        # Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
2910        date_expr = expression.this
2911        time_expr = expression.expression
2912
2913        if date_expr is not None and time_expr is not None:
2914            # In DuckDB, DATE + TIME produces TIMESTAMP
2915            return self.sql(exp.Add(this=date_expr, expression=time_expr))
2916
2917        # Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
2918        sec = expression.args.get("sec")
2919        if sec is None:
2920            # This shouldn't happen with valid input, but handle gracefully
2921            return rename_func("MAKE_TIMESTAMP")(self, expression)
2922
2923        milli = expression.args.get("milli")
2924        if milli is not None:
2925            sec += milli.pop() / exp.Literal.number(1000.0)
2926
2927        nano = expression.args.get("nano")
2928        if nano is not None:
2929            sec += nano.pop() / exp.Literal.number(1000000000.0)
2930
2931        if milli or nano:
2932            expression.set("sec", sec)
2933
2934        return rename_func("MAKE_TIMESTAMP")(self, expression)
2935
2936    @unsupported_args("nano")
2937    def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
2938        # Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2939        if nano := expression.args.get("nano"):
2940            nano.pop()
2941
2942        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2943        return f"CAST({timestamp} AS TIMESTAMPTZ)"
2944
2945    @unsupported_args("nano")
2946    def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
2947        # Extract zone before popping
2948        zone = expression.args.get("zone")
2949        # Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2950        if zone:
2951            zone = zone.pop()
2952
2953        if nano := expression.args.get("nano"):
2954            nano.pop()
2955
2956        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2957
2958        if zone:
2959            # Use AT TIME ZONE to apply the explicit timezone
2960            return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
2961
2962        return timestamp
2963
2964    def tablesample_sql(
2965        self,
2966        expression: exp.TableSample,
2967        tablesample_keyword: str | None = None,
2968    ) -> str:
2969        if not isinstance(expression.parent, exp.Select):
2970            # This sample clause only applies to a single source, not the entire resulting relation
2971            tablesample_keyword = "TABLESAMPLE"
2972
2973        if expression.args.get("size"):
2974            method = expression.args.get("method")
2975            if method and method.name.upper() != "RESERVOIR":
2976                self.unsupported(
2977                    f"Sampling method {method} is not supported with a discrete sample count, "
2978                    "defaulting to reservoir sampling"
2979                )
2980                expression.set("method", exp.var("RESERVOIR"))
2981
2982        return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
2983
2984    def join_sql(self, expression: exp.Join) -> str:
2985        if (
2986            not expression.args.get("using")
2987            and not expression.args.get("on")
2988            and not expression.method
2989            and (expression.kind in ("", "INNER", "OUTER"))
2990        ):
2991            # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
2992            # DuckDB doesn't, but we can just add a dummy ON clause that is always true
2993            if isinstance(expression.this, exp.Unnest):
2994                return super().join_sql(expression.on(exp.true()))
2995
2996            expression.set("side", None)
2997            expression.set("kind", None)
2998
2999        return super().join_sql(expression)
3000
3001    def countif_sql(self, expression: exp.CountIf) -> str:
3002        if self.dialect.version >= (1, 2):
3003            return self.function_fallback_sql(expression)
3004
3005        # https://github.com/tobymao/sqlglot/pull/4749
3006        return count_if_to_sum(self, expression)
3007
3008    def bracket_sql(self, expression: exp.Bracket) -> str:
3009        if self.dialect.version >= (1, 2):
3010            return super().bracket_sql(expression)
3011
3012        # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
3013        this = expression.this
3014        if isinstance(this, exp.Array):
3015            this.replace(exp.paren(this))
3016
3017        bracket = super().bracket_sql(expression)
3018
3019        if not expression.args.get("returns_list_for_maps"):
3020            if not this.type:
3021                from sqlglot.optimizer.annotate_types import annotate_types
3022
3023                this = annotate_types(this, dialect=self.dialect)
3024
3025            if this.is_type(exp.DType.MAP):
3026                bracket = f"({bracket})[1]"
3027
3028        return bracket
3029
3030    def withingroup_sql(self, expression: exp.WithinGroup) -> str:
3031        func = expression.this
3032
3033        # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
3034        # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
3035        if isinstance(func, exp.ArrayAgg):
3036            if not isinstance(order := expression.expression, exp.Order):
3037                return self.sql(func)
3038
3039            # Save the original column for FILTER clause (before wrapping with Order)
3040            original_this = func.this
3041
3042            # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
3043            # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
3044            func.set(
3045                "this",
3046                exp.Order(
3047                    this=func.this.copy(),
3048                    expressions=order.expressions,
3049                ),
3050            )
3051
3052            # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
3053            # Use original_this (not the Order-wrapped version) for the FILTER condition
3054            array_agg_sql = self.function_fallback_sql(func)
3055            return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
3056
3057        # For other functions (like PERCENTILES), use existing logic
3058        expression_sql = self.sql(expression, "expression")
3059
3060        if isinstance(func, exp.PERCENTILES):
3061            # Make the order key the first arg and slide the fraction to the right
3062            # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
3063            order_col = expression.find(exp.Ordered)
3064            if order_col:
3065                func.set("expression", func.this)
3066                func.set("this", order_col.this)
3067
3068        this = self.sql(expression, "this").rstrip(")")
3069
3070        return f"{this}{expression_sql})"
3071
3072    def length_sql(self, expression: exp.Length) -> str:
3073        arg = expression.this
3074
3075        # Dialects like BQ and Snowflake also accept binary values as args, so
3076        # DDB will attempt to infer the type or resort to case/when resolution
3077        if not expression.args.get("binary") or arg.is_string:
3078            return self.func("LENGTH", arg)
3079
3080        if not arg.type:
3081            from sqlglot.optimizer.annotate_types import annotate_types
3082
3083            arg = annotate_types(arg, dialect=self.dialect)
3084
3085        if arg.is_type(*exp.DataType.TEXT_TYPES):
3086            return self.func("LENGTH", arg)
3087
3088        # We need these casts to make duckdb's static type checker happy
3089        blob = exp.cast(arg, exp.DType.VARBINARY)
3090        varchar = exp.cast(arg, exp.DType.VARCHAR)
3091
3092        case = (
3093            exp.case(exp.Anonymous(this="TYPEOF", expressions=[arg]))
3094            .when(exp.Literal.string("BLOB"), exp.ByteLength(this=blob))
3095            .else_(exp.Anonymous(this="LENGTH", expressions=[varchar]))
3096        )
3097        return self.sql(case)
3098
3099    def bitlength_sql(self, expression: exp.BitLength) -> str:
3100        if not _is_binary(arg := expression.this):
3101            return self.func("BIT_LENGTH", arg)
3102
3103        blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
3104        return self.sql(exp.ByteLength(this=blob) * exp.Literal.number(8))
3105
3106    def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str:
3107        arg = expression.expressions[0]
3108        if arg.is_type(*exp.DataType.REAL_TYPES):
3109            arg = exp.cast(arg, exp.DType.INT)
3110        return self.func("CHR", arg)
3111
3112    def collation_sql(self, expression: exp.Collation) -> str:
3113        self.unsupported("COLLATION function is not supported by DuckDB")
3114        return self.function_fallback_sql(expression)
3115
3116    def collate_sql(self, expression: exp.Collate) -> str:
3117        if not expression.expression.is_string:
3118            return super().collate_sql(expression)
3119
3120        raw = expression.expression.name
3121        if not raw:
3122            return self.sql(expression.this)
3123
3124        parts = []
3125        for part in raw.split("-"):
3126            lower = part.lower()
3127            if lower not in _SNOWFLAKE_COLLATION_DEFAULTS:
3128                if lower in _SNOWFLAKE_COLLATION_UNSUPPORTED:
3129                    self.unsupported(
3130                        f"Snowflake collation specifier '{part}' has no DuckDB equivalent"
3131                    )
3132                parts.append(lower)
3133
3134        if not parts:
3135            return self.sql(expression.this)
3136        return super().collate_sql(
3137            exp.Collate(this=expression.this, expression=exp.var(".".join(parts)))
3138        )
3139
3140    def _validate_regexp_flags(self, flags: exp.Expr | None, supported_flags: str) -> str | None:
3141        """
3142        Validate and filter regexp flags for DuckDB compatibility.
3143
3144        Args:
3145            flags: The flags expression to validate
3146            supported_flags: String of supported flags (e.g., "ims", "cims").
3147                            Only these flags will be returned.
3148
3149        Returns:
3150            Validated/filtered flag string, or None if no valid flags remain
3151        """
3152        if not isinstance(flags, exp.Expr):
3153            return None
3154
3155        if not flags.is_string:
3156            self.unsupported("Non-literal regexp flags are not fully supported in DuckDB")
3157            return None
3158
3159        flag_str = flags.this
3160        unsupported = set(flag_str) - set(supported_flags)
3161
3162        if unsupported:
3163            self.unsupported(
3164                f"Regexp flags {sorted(unsupported)} are not supported in this context"
3165            )
3166
3167        flag_str = "".join(f for f in flag_str if f in supported_flags)
3168        return flag_str if flag_str else None
3169
3170    def regexpcount_sql(self, expression: exp.RegexpCount) -> str:
3171        this = expression.this
3172        pattern = expression.expression
3173        position = expression.args.get("position")
3174        parameters = expression.args.get("parameters")
3175
3176        # Validate flags - only "ims" flags are supported for embedded patterns
3177        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3178
3179        if position:
3180            this = exp.Substring(this=this, start=position)
3181
3182        # Embed flags in pattern (REGEXP_EXTRACT_ALL doesn't support flags argument)
3183        if validated_flags:
3184            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3185
3186        # Handle empty pattern: Snowflake returns 0, DuckDB would match between every character
3187        result = (
3188            exp.case()
3189            .when(
3190                exp.EQ(this=pattern, expression=exp.Literal.string("")),
3191                exp.Literal.number(0),
3192            )
3193            .else_(
3194                exp.Length(
3195                    this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
3196                )
3197            )
3198        )
3199
3200        return self.sql(result)
3201
3202    def regexpreplace_sql(self, expression: exp.RegexpReplace) -> str:
3203        subject = expression.this
3204        pattern = expression.expression
3205        replacement = expression.args.get("replacement") or exp.Literal.string("")
3206        position = expression.args.get("position")
3207        occurrence = expression.args.get("occurrence")
3208        modifiers = expression.args.get("modifiers")
3209
3210        validated_flags = self._validate_regexp_flags(modifiers, supported_flags="cimsg") or ""
3211
3212        # Handle occurrence (only literals supported)
3213        if occurrence and not occurrence.is_int:
3214            self.unsupported("REGEXP_REPLACE with non-literal occurrence")
3215        else:
3216            occurrence = occurrence.to_py() if occurrence and occurrence.is_int else 0
3217            if occurrence > 1:
3218                self.unsupported(f"REGEXP_REPLACE occurrence={occurrence} not supported")
3219            # flag duckdb to do either all or none, single_replace check is for duckdb round trip
3220            elif (
3221                occurrence == 0
3222                and "g" not in validated_flags
3223                and not expression.args.get("single_replace")
3224            ):
3225                validated_flags += "g"
3226
3227        # Handle position (only literals supported)
3228        prefix = None
3229        if position and not position.is_int:
3230            self.unsupported("REGEXP_REPLACE with non-literal position")
3231        elif position and position.is_int and position.to_py() > 1:
3232            pos = position.to_py()
3233            prefix = exp.Substring(
3234                this=subject, start=exp.Literal.number(1), length=exp.Literal.number(pos - 1)
3235            )
3236            subject = exp.Substring(this=subject, start=exp.Literal.number(pos))
3237
3238        result: exp.Expr = exp.Anonymous(
3239            this="REGEXP_REPLACE",
3240            expressions=[
3241                subject,
3242                pattern,
3243                replacement,
3244                exp.Literal.string(validated_flags) if validated_flags else None,
3245            ],
3246        )
3247
3248        if prefix:
3249            result = exp.Concat(expressions=[prefix, result])
3250
3251        return self.sql(result)
3252
3253    def regexplike_sql(self, expression: exp.RegexpLike) -> str:
3254        this = expression.this
3255        pattern = expression.expression
3256        flag = expression.args.get("flag")
3257
3258        if expression.args.get("full_match"):
3259            validated_flags = self._validate_regexp_flags(flag, supported_flags="cims")
3260            flag = exp.Literal.string(validated_flags) if validated_flags else None
3261            return self.func("REGEXP_FULL_MATCH", this, pattern, flag)
3262
3263        return self.func("REGEXP_MATCHES", this, pattern, flag)
3264
3265    @unsupported_args("ins_cost", "del_cost", "sub_cost")
3266    def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
3267        this = expression.this
3268        expr = expression.expression
3269        max_dist = expression.args.get("max_dist")
3270
3271        if max_dist is None:
3272            return self.func("LEVENSHTEIN", this, expr)
3273
3274        # Emulate Snowflake semantics: if distance > max_dist, return max_dist
3275        levenshtein = exp.Levenshtein(this=this, expression=expr)
3276        return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
3277
3278    def pad_sql(self, expression: exp.Pad) -> str:
3279        """
3280        Handle RPAD/LPAD for VARCHAR and BINARY types.
3281
3282        For VARCHAR: Delegate to parent class
3283        For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))
3284        """
3285        string_arg = expression.this
3286        fill_arg = expression.args.get("fill_pattern") or exp.Literal.string(" ")
3287
3288        if _is_binary(string_arg) or _is_binary(fill_arg):
3289            length_arg = expression.expression
3290            is_left = expression.args.get("is_left")
3291
3292            input_len = exp.ByteLength(this=string_arg)
3293            chars_needed = length_arg - input_len
3294            pad_count = exp.Greatest(
3295                this=exp.Literal.number(0), expressions=[chars_needed], ignore_nulls=True
3296            )
3297            repeat_expr = exp.Repeat(this=fill_arg, times=pad_count)
3298
3299            left, right = string_arg, repeat_expr
3300            if is_left:
3301                left, right = right, left
3302
3303            result = exp.DPipe(this=left, expression=right)
3304            return self.sql(result)
3305
3306        # For VARCHAR: Delegate to parent class (handles PAD_FILL_PATTERN_IS_REQUIRED)
3307        return super().pad_sql(expression)
3308
3309    def minhash_sql(self, expression: exp.Minhash) -> str:
3310        k = expression.this
3311        exprs = expression.expressions
3312
3313        if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
3314            self.unsupported(
3315                "MINHASH with multiple expressions or * requires manual query restructuring"
3316            )
3317            return self.func("MINHASH", k, *exprs)
3318
3319        expr = exprs[0]
3320        result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
3321        return f"({self.sql(result)})"
3322
3323    def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
3324        expr = expression.this
3325        result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
3326        return f"({self.sql(result)})"
3327
3328    def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
3329        expr = expression.this
3330        result = exp.replace_placeholders(self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr)
3331        return f"({self.sql(result)})"
3332
3333    def arrayuniqueagg_sql(self, expression: exp.ArrayUniqueAgg) -> str:
3334        return self.sql(
3335            exp.Filter(
3336                this=exp.func("LIST", exp.Distinct(expressions=[expression.this])),
3337                expression=exp.Where(this=expression.this.copy().is_(exp.null()).not_()),
3338            )
3339        )
3340
3341    def arrayunionagg_sql(self, expression: exp.ArrayUnionAgg) -> str:
3342        self.unsupported("ARRAY_UNION_AGG is not supported in DuckDB")
3343        return self.function_fallback_sql(expression)
3344
3345    def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
3346        arr = expression.this
3347        func = self.func("LIST_DISTINCT", arr)
3348
3349        if expression.args.get("check_null"):
3350            add_null_to_array = exp.func(
3351                "LIST_APPEND", exp.func("LIST_DISTINCT", exp.ArrayCompact(this=arr)), exp.Null()
3352            )
3353            return self.sql(
3354                exp.If(
3355                    this=exp.NEQ(
3356                        this=exp.ArraySize(this=arr), expression=exp.func("LIST_COUNT", arr)
3357                    ),
3358                    true=add_null_to_array,
3359                    false=func,
3360                )
3361            )
3362
3363        return func
3364
3365    def arrayintersect_sql(self, expression: exp.ArrayIntersect) -> str:
3366        if expression.args.get("is_multiset") and len(expression.expressions) == 2:
3367            return self._array_bag_sql(
3368                self.ARRAY_INTERSECTION_CONDITION,
3369                expression.expressions[0],
3370                expression.expressions[1],
3371            )
3372        return self.function_fallback_sql(expression)
3373
3374    def arrayexcept_sql(self, expression: exp.ArrayExcept) -> str:
3375        arr1, arr2 = expression.this, expression.expression
3376        if expression.args.get("is_multiset"):
3377            return self._array_bag_sql(self.ARRAY_EXCEPT_CONDITION, arr1, arr2)
3378        return self.sql(
3379            exp.replace_placeholders(self.ARRAY_EXCEPT_SET_TEMPLATE, arr1=arr1, arr2=arr2)
3380        )
3381
3382    def arrayslice_sql(self, expression: exp.ArraySlice) -> str:
3383        """
3384        Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's
3385        ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE
3386        expressions that adjust the index at query time:
3387          - start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
3388          - end:   CASE WHEN end < 0 THEN end - 1 ELSE end END
3389        """
3390        start, end = expression.args.get("start"), expression.args.get("end")
3391
3392        if expression.args.get("zero_based"):
3393            if start is not None:
3394                start = (
3395                    exp.case()
3396                    .when(
3397                        exp.GTE(this=start.copy(), expression=exp.Literal.number(0)),
3398                        exp.Add(this=start.copy(), expression=exp.Literal.number(1)),
3399                    )
3400                    .else_(start)
3401                )
3402            if end is not None:
3403                end = (
3404                    exp.case()
3405                    .when(
3406                        exp.LT(this=end.copy(), expression=exp.Literal.number(0)),
3407                        exp.Sub(this=end.copy(), expression=exp.Literal.number(1)),
3408                    )
3409                    .else_(end)
3410                )
3411
3412        return self.func("ARRAY_SLICE", expression.this, start, end, expression.args.get("step"))
3413
3414    def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
3415        args = expression.expressions
3416
3417        if not args:
3418            # Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
3419            return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
3420
3421        # Build placeholder values for template
3422        lengths = [exp.Length(this=arg) for arg in args]
3423        max_len = (
3424            lengths[0]
3425            if len(lengths) == 1
3426            else exp.Greatest(this=lengths[0], expressions=lengths[1:])
3427        )
3428
3429        # Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
3430        empty_struct = exp.func(
3431            "STRUCT",
3432            *[
3433                exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
3434                for i in range(len(args))
3435            ],
3436        )
3437
3438        # Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
3439        # COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
3440        index = exp.column("__i") + 1
3441        transform_struct = exp.func(
3442            "STRUCT",
3443            *[
3444                exp.PropertyEQ(
3445                    this=exp.Literal.string(f"${i + 1}"),
3446                    expression=exp.func("COALESCE", arg, exp.array())[index],
3447                )
3448                for i, arg in enumerate(args)
3449            ],
3450        )
3451
3452        result = exp.replace_placeholders(
3453            self.ARRAYS_ZIP_TEMPLATE.copy(),
3454            null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
3455            all_empty_check=exp.and_(
3456                *[
3457                    exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
3458                    for arg in args
3459                ]
3460            ),
3461            empty_struct=empty_struct,
3462            max_len=max_len,
3463            transform_struct=transform_struct,
3464        )
3465        return self.sql(result)
3466
3467    def lower_sql(self, expression: exp.Lower) -> str:
3468        result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
3469        return _gen_with_cast_to_blob(self, expression, result_sql)
3470
3471    def upper_sql(self, expression: exp.Upper) -> str:
3472        result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
3473        return _gen_with_cast_to_blob(self, expression, result_sql)
3474
3475    def reverse_sql(self, expression: exp.Reverse) -> str:
3476        result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
3477        return _gen_with_cast_to_blob(self, expression, result_sql)
3478
3479    def _left_right_sql(self, expression: exp.Left | exp.Right, func_name: str) -> str:
3480        arg = expression.this
3481        length = expression.expression
3482        is_binary = _is_binary(arg)
3483
3484        if is_binary:
3485            # LEFT/RIGHT(blob, n) becomes UNHEX(LEFT/RIGHT(HEX(blob), n * 2))
3486            # Each byte becomes 2 hex chars, so multiply length by 2
3487            hex_arg = exp.Hex(this=arg)
3488            hex_length = exp.Mul(this=length, expression=exp.Literal.number(2))
3489            result: exp.Expression = exp.Unhex(
3490                this=exp.Anonymous(this=func_name, expressions=[hex_arg, hex_length])
3491            )
3492        else:
3493            result = exp.Anonymous(this=func_name, expressions=[arg, length])
3494
3495        if expression.args.get("negative_length_returns_empty"):
3496            empty: exp.Expression = exp.Literal.string("")
3497            if is_binary:
3498                empty = exp.Unhex(this=empty)
3499            result = exp.case().when(length < exp.Literal.number(0), empty).else_(result)
3500
3501        return self.sql(result)
3502
3503    def left_sql(self, expression: exp.Left) -> str:
3504        return self._left_right_sql(expression, "LEFT")
3505
3506    def right_sql(self, expression: exp.Right) -> str:
3507        return self._left_right_sql(expression, "RIGHT")
3508
3509    def rtrimmedlength_sql(self, expression: exp.RtrimmedLength) -> str:
3510        return self.func("LENGTH", exp.Trim(this=expression.this, position="TRAILING"))
3511
3512    def stuff_sql(self, expression: exp.Stuff) -> str:
3513        base = expression.this
3514        start = expression.args["start"]
3515        length = expression.args["length"]
3516        insertion = expression.expression
3517        is_binary = _is_binary(base)
3518
3519        if is_binary:
3520            # DuckDB's SUBSTRING doesn't accept BLOB; operate on the HEX string instead
3521            # (each byte = 2 hex chars), then UNHEX back to BLOB
3522            base = exp.Hex(this=base)
3523            insertion = exp.Hex(this=insertion)
3524            left = exp.Substring(
3525                this=base.copy(),
3526                start=exp.Literal.number(1),
3527                length=(start.copy() - exp.Literal.number(1)) * exp.Literal.number(2),
3528            )
3529            right = exp.Substring(
3530                this=base.copy(),
3531                start=((start + length) - exp.Literal.number(1)) * exp.Literal.number(2)
3532                + exp.Literal.number(1),
3533            )
3534        else:
3535            left = exp.Substring(
3536                this=base.copy(),
3537                start=exp.Literal.number(1),
3538                length=start.copy() - exp.Literal.number(1),
3539            )
3540            right = exp.Substring(this=base.copy(), start=start + length)
3541        result: exp.Expr = exp.DPipe(
3542            this=exp.DPipe(this=left, expression=insertion), expression=right
3543        )
3544
3545        if is_binary:
3546            result = exp.Unhex(this=result)
3547
3548        return self.sql(result)
3549
3550    def rand_sql(self, expression: exp.Rand) -> str:
3551        seed = expression.this
3552        if seed is not None:
3553            self.unsupported("RANDOM with seed is not supported in DuckDB")
3554
3555        lower = expression.args.get("lower")
3556        upper = expression.args.get("upper")
3557
3558        if lower and upper:
3559            # scale DuckDB's [0,1) to the specified range
3560            range_size = exp.paren(upper - lower)
3561            scaled = exp.Add(this=lower, expression=exp.func("random") * range_size)
3562
3563            # For now we assume that if bounds are set, return type is BIGINT. Snowflake/Teradata
3564            result = exp.cast(scaled, exp.DType.BIGINT)
3565            return self.sql(result)
3566
3567        # Default DuckDB behavior - just return RANDOM() as float
3568        return "RANDOM()"
3569
3570    def bytelength_sql(self, expression: exp.ByteLength) -> str:
3571        arg = expression.this
3572
3573        # Check if it's a text type (handles both literals and annotated expressions)
3574        if arg.is_type(*exp.DataType.TEXT_TYPES):
3575            return self.func("OCTET_LENGTH", exp.Encode(this=arg))
3576
3577        # Default: pass through as-is (conservative for DuckDB, handles binary and unannotated)
3578        return self.func("OCTET_LENGTH", arg)
3579
3580    def base64encode_sql(self, expression: exp.Base64Encode) -> str:
3581        # DuckDB TO_BASE64 requires BLOB input
3582        # Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
3583        # encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
3584        result = expression.this
3585
3586        # Check if input is a string type - ENCODE only accepts VARCHAR
3587        if result.is_type(*exp.DataType.TEXT_TYPES):
3588            result = exp.Encode(this=result)
3589
3590        result = exp.ToBase64(this=result)
3591
3592        max_line_length = expression.args.get("max_line_length")
3593        alphabet = expression.args.get("alphabet")
3594
3595        # Handle custom alphabet by replacing standard chars with custom ones
3596        result = _apply_base64_alphabet_replacements(result, alphabet)
3597
3598        # Handle max_line_length by inserting newlines every N characters
3599        line_length = (
3600            t.cast(int, max_line_length.to_py())
3601            if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
3602            else 0
3603        )
3604        if line_length > 0:
3605            newline = exp.Chr(expressions=[exp.Literal.number(10)])
3606            result = exp.Trim(
3607                this=exp.RegexpReplace(
3608                    this=result,
3609                    expression=exp.Literal.string(f"(.{{{line_length}}})"),
3610                    replacement=exp.Concat(expressions=[exp.Literal.string("\\1"), newline.copy()]),
3611                ),
3612                expression=newline,
3613                position="TRAILING",
3614            )
3615
3616        return self.sql(result)
3617
3618    def hex_sql(self, expression: exp.Hex) -> str:
3619        case = expression.args.get("case")
3620
3621        if not case:
3622            return self.func("HEX", expression.this)
3623
3624        hex_expr = exp.Hex(this=expression.this)
3625        return self.sql(
3626            exp.case()
3627            .when(case.is_(exp.null()), exp.null())
3628            .when(case.copy().eq(0), exp.Lower(this=hex_expr.copy()))
3629            .else_(hex_expr)
3630        )
3631
3632    def replace_sql(self, expression: exp.Replace) -> str:
3633        result_sql = self.func(
3634            "REPLACE",
3635            _cast_to_varchar(expression.this),
3636            _cast_to_varchar(expression.expression),
3637            _cast_to_varchar(expression.args.get("replacement")),
3638        )
3639        return _gen_with_cast_to_blob(self, expression, result_sql)
3640
3641    def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
3642        _prepare_binary_bitwise_args(expression)
3643        result_sql = self.binary(expression, op)
3644        return _gen_with_cast_to_blob(self, expression, result_sql)
3645
3646    def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
3647        _prepare_binary_bitwise_args(expression)
3648        result_sql = self.func("XOR", expression.this, expression.expression)
3649        return _gen_with_cast_to_blob(self, expression, result_sql)
3650
3651    def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
3652        this = expression.this
3653        key = expression.args.get("key")
3654        key_sql = key.name if isinstance(key, exp.Expr) else ""
3655        value_sql = self.sql(expression, "value")
3656
3657        kv_sql = f"{key_sql} := {value_sql}"
3658
3659        # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
3660        # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
3661        if isinstance(this, exp.Struct) and not this.expressions:
3662            return self.func("STRUCT_PACK", kv_sql)
3663
3664        return self.func("STRUCT_INSERT", this, kv_sql)
3665
3666    def mapcat_sql(self, expression: exp.MapCat) -> str:
3667        result = exp.replace_placeholders(
3668            self.MAPCAT_TEMPLATE.copy(),
3669            map1=expression.this,
3670            map2=expression.expression,
3671        )
3672        return self.sql(result)
3673
3674    def mapcontainskey_sql(self, expression: exp.MapContainsKey) -> str:
3675        return self.func(
3676            "ARRAY_CONTAINS", exp.func("MAP_KEYS", expression.args["key"]), expression.this
3677        )
3678
3679    def mapdelete_sql(self, expression: exp.MapDelete) -> str:
3680        map_arg = expression.this
3681        keys_to_delete = expression.expressions
3682
3683        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3684
3685        lambda_expr = exp.Lambda(
3686            this=exp.In(this=x_dot_key, expressions=keys_to_delete).not_(),
3687            expressions=[exp.to_identifier("x")],
3688        )
3689        result = exp.func(
3690            "MAP_FROM_ENTRIES",
3691            exp.ArrayFilter(this=exp.func("MAP_ENTRIES", map_arg), expression=lambda_expr),
3692        )
3693        return self.sql(result)
3694
3695    def mappick_sql(self, expression: exp.MapPick) -> str:
3696        map_arg = expression.this
3697        keys_to_pick = expression.expressions
3698
3699        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3700
3701        if len(keys_to_pick) == 1 and keys_to_pick[0].is_type(exp.DType.ARRAY):
3702            lambda_expr = exp.Lambda(
3703                this=exp.func("ARRAY_CONTAINS", keys_to_pick[0], x_dot_key),
3704                expressions=[exp.to_identifier("x")],
3705            )
3706        else:
3707            lambda_expr = exp.Lambda(
3708                this=exp.In(this=x_dot_key, expressions=keys_to_pick),
3709                expressions=[exp.to_identifier("x")],
3710            )
3711
3712        result = exp.func(
3713            "MAP_FROM_ENTRIES",
3714            exp.func("LIST_FILTER", exp.func("MAP_ENTRIES", map_arg), lambda_expr),
3715        )
3716        return self.sql(result)
3717
3718    def mapsize_sql(self, expression: exp.MapSize) -> str:
3719        return self.func("CARDINALITY", expression.this)
3720
3721    @unsupported_args("update_flag")
3722    def mapinsert_sql(self, expression: exp.MapInsert) -> str:
3723        map_arg = expression.this
3724        key = expression.args.get("key")
3725        value = expression.args.get("value")
3726
3727        map_type = map_arg.type
3728
3729        if value is not None:
3730            if map_type and map_type.expressions and len(map_type.expressions) > 1:
3731                # Extract the value type from MAP(key_type, value_type)
3732                value_type = map_type.expressions[1]
3733                # Cast value to match the map's value type to avoid type conflicts
3734                value = exp.cast(value, value_type)
3735            # else: polymorphic MAP case - no type parameters available, use value as-is
3736
3737        # Create a single-entry map for the new key-value pair
3738        new_entry_struct = exp.Struct(expressions=[exp.PropertyEQ(this=key, expression=value)])
3739        new_entry: exp.Expression = exp.ToMap(this=new_entry_struct)
3740
3741        # Use MAP_CONCAT to merge the original map with the new entry
3742        # This automatically handles both insert and update cases
3743        result = exp.func("MAP_CONCAT", map_arg, new_entry)
3744
3745        return self.sql(result)
3746
3747    def startswith_sql(self, expression: exp.StartsWith) -> str:
3748        return self.func(
3749            "STARTS_WITH",
3750            _cast_to_varchar(expression.this),
3751            _cast_to_varchar(expression.expression),
3752        )
3753
3754    def space_sql(self, expression: exp.Space) -> str:
3755        # DuckDB's REPEAT requires BIGINT for the count parameter
3756        return self.sql(
3757            exp.Repeat(
3758                this=exp.Literal.string(" "),
3759                times=exp.cast(expression.this, exp.DType.BIGINT),
3760            )
3761        )
3762
3763    def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
3764        # For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
3765        if isinstance(expression.this, exp.Generator):
3766            # Preserve alias, joins, and other table-level args
3767            table = exp.Table(
3768                this=expression.this,
3769                alias=expression.args.get("alias"),
3770                joins=expression.args.get("joins"),
3771            )
3772            return self.sql(table)
3773
3774        return super().tablefromrows_sql(expression)
3775
3776    def unnest_sql(self, expression: exp.Unnest) -> str:
3777        explode_array = expression.args.get("explode_array")
3778        if explode_array:
3779            # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
3780            # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
3781            expression.expressions.append(
3782                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
3783            )
3784
3785            # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
3786            alias = expression.args.get("alias")
3787            if isinstance(alias, exp.TableAlias):
3788                expression.set("alias", None)
3789                if alias.columns:
3790                    alias = exp.TableAlias(this=seq_get(alias.columns, 0))
3791
3792            unnest_sql = super().unnest_sql(expression)
3793            select = exp.Select(expressions=[unnest_sql]).subquery(alias)
3794            return self.sql(select)
3795
3796        return super().unnest_sql(expression)
3797
3798    def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
3799        this = expression.this
3800
3801        if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3802            # DuckDB should render IGNORE NULLS only for the general-purpose
3803            # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
3804            return super().ignorenulls_sql(expression)
3805
3806        if isinstance(this, exp.First):
3807            this = exp.AnyValue(this=this.this)
3808
3809        if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
3810            self.unsupported("IGNORE NULLS is not supported for non-window functions.")
3811
3812        return self.sql(this)
3813
3814    def split_sql(self, expression: exp.Split) -> str:
3815        base_func = exp.func("STR_SPLIT", expression.this, expression.expression)
3816
3817        case_expr = exp.case().else_(base_func)
3818        needs_case = False
3819
3820        if expression.args.get("null_returns_null"):
3821            case_expr = case_expr.when(expression.expression.is_(exp.null()), exp.null())
3822            needs_case = True
3823
3824        if expression.args.get("empty_delimiter_returns_whole"):
3825            # When delimiter is empty string, return input string as single array element
3826            array_with_input = exp.array(expression.this)
3827            case_expr = case_expr.when(
3828                expression.expression.eq(exp.Literal.string("")), array_with_input
3829            )
3830            needs_case = True
3831
3832        return self.sql(case_expr if needs_case else base_func)
3833
3834    def splitpart_sql(self, expression: exp.SplitPart) -> str:
3835        string_arg = expression.this
3836        delimiter_arg = expression.args.get("delimiter")
3837        part_index_arg = expression.args.get("part_index")
3838
3839        if delimiter_arg and part_index_arg:
3840            # Handle Snowflake's "index 0 and 1 both return first element" behavior
3841            if expression.args.get("part_index_zero_as_one"):
3842                # Convert 0 to 1 for compatibility
3843
3844                part_index_arg = exp.Paren(
3845                    this=exp.case()
3846                    .when(part_index_arg.eq(exp.Literal.number("0")), exp.Literal.number("1"))
3847                    .else_(part_index_arg)
3848                )
3849
3850            # Use Anonymous to avoid recursion
3851            base_func_expr: exp.Expr = exp.Anonymous(
3852                this="SPLIT_PART", expressions=[string_arg, delimiter_arg, part_index_arg]
3853            )
3854            needs_case_transform = False
3855            case_expr = exp.case().else_(base_func_expr)
3856
3857            if expression.args.get("empty_delimiter_returns_whole"):
3858                # When delimiter is empty string:
3859                # - Return whole string if part_index is 1 or -1
3860                # - Return empty string otherwise
3861                empty_case = exp.Paren(
3862                    this=exp.case()
3863                    .when(
3864                        exp.or_(
3865                            part_index_arg.eq(exp.Literal.number("1")),
3866                            part_index_arg.eq(exp.Literal.number("-1")),
3867                        ),
3868                        string_arg,
3869                    )
3870                    .else_(exp.Literal.string(""))
3871                )
3872
3873                case_expr = case_expr.when(delimiter_arg.eq(exp.Literal.string("")), empty_case)
3874                needs_case_transform = True
3875
3876            """
3877            Output looks something like this:
3878
3879            CASE
3880            WHEN delimiter is '' THEN
3881                (
3882                    CASE
3883                    WHEN adjusted_part_index = 1 OR adjusted_part_index = -1 THEN input
3884                    ELSE '' END
3885                )
3886            ELSE SPLIT_PART(input, delimiter, adjusted_part_index)
3887            END
3888
3889            """
3890            return self.sql(case_expr if needs_case_transform else base_func_expr)
3891
3892        return self.function_fallback_sql(expression)
3893
3894    def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
3895        if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3896            # DuckDB should render RESPECT NULLS only for the general-purpose
3897            # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
3898            return super().respectnulls_sql(expression)
3899
3900        self.unsupported("RESPECT NULLS is not supported for non-window functions.")
3901        return self.sql(expression, "this")
3902
3903    def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
3904        null = expression.args.get("null")
3905
3906        if expression.args.get("null_is_empty"):
3907            x = exp.to_identifier("x")
3908            list_transform = exp.Transform(
3909                this=expression.this.copy(),
3910                expression=exp.Lambda(
3911                    this=exp.Coalesce(
3912                        this=exp.cast(x, "TEXT"), expressions=[exp.Literal.string("")]
3913                    ),
3914                    expressions=[x],
3915                ),
3916            )
3917            array_to_string = exp.ArrayToString(
3918                this=list_transform, expression=expression.expression
3919            )
3920            if expression.args.get("null_delim_is_null"):
3921                return self.sql(
3922                    exp.case()
3923                    .when(expression.expression.copy().is_(exp.null()), exp.null())
3924                    .else_(array_to_string)
3925                )
3926            return self.sql(array_to_string)
3927
3928        if null:
3929            x = exp.to_identifier("x")
3930            return self.sql(
3931                exp.ArrayToString(
3932                    this=exp.Transform(
3933                        this=expression.this,
3934                        expression=exp.Lambda(
3935                            this=exp.Coalesce(this=x, expressions=[null]),
3936                            expressions=[x],
3937                        ),
3938                    ),
3939                    expression=expression.expression,
3940                )
3941            )
3942
3943        return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
3944
3945    def concatws_sql(self, expression: exp.ConcatWs) -> str:
3946        # DuckDB-specific: handle binary types using DPipe (||) operator
3947        separator = seq_get(expression.expressions, 0)
3948        args = expression.expressions[1:]
3949
3950        if any(_is_binary(arg) for arg in [separator, *args]):
3951            result = args[0]
3952            for arg in args[1:]:
3953                result = exp.DPipe(
3954                    this=exp.DPipe(this=result, expression=separator), expression=arg
3955                )
3956            return self.sql(result)
3957
3958        return super().concatws_sql(expression)
3959
3960    def _regexp_extract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str:
3961        this = expression.this
3962        group = expression.args.get("group")
3963        params = expression.args.get("parameters")
3964        position = expression.args.get("position")
3965        occurrence = expression.args.get("occurrence")
3966        null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
3967
3968        # Handle Snowflake's 'e' flag: it enables capture group extraction
3969        # In DuckDB, this is controlled by the group parameter directly
3970        if params and params.is_string and "e" in params.name:
3971            params = exp.Literal.string(params.name.replace("e", ""))
3972
3973        validated_flags = self._validate_regexp_flags(params, supported_flags="cims")
3974
3975        # Strip default group when no following params (DuckDB default is same as group=0)
3976        if (
3977            not validated_flags
3978            and group
3979            and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
3980        ):
3981            group = None
3982
3983        flags_expr = exp.Literal.string(validated_flags) if validated_flags else None
3984
3985        # use substring to handle position argument
3986        if position and (not position.is_int or position.to_py() > 1):
3987            this = exp.Substring(this=this, start=position)
3988
3989            if null_if_pos_overflow:
3990                this = exp.Nullif(this=this, expression=exp.Literal.string(""))
3991
3992        is_extract_all = isinstance(expression, exp.RegexpExtractAll)
3993        non_single_occurrence = occurrence and (not occurrence.is_int or occurrence.to_py() > 1)
3994
3995        if is_extract_all or non_single_occurrence:
3996            name = "REGEXP_EXTRACT_ALL"
3997        else:
3998            name = "REGEXP_EXTRACT"
3999
4000        result: exp.Expr = exp.Anonymous(
4001            this=name, expressions=[this, expression.expression, group, flags_expr]
4002        )
4003
4004        # Array slicing for REGEXP_EXTRACT_ALL with occurrence
4005        if is_extract_all and non_single_occurrence:
4006            result = exp.Bracket(this=result, expressions=[exp.Slice(this=occurrence)])
4007        # ARRAY_EXTRACT for REGEXP_EXTRACT with occurrence > 1
4008        elif non_single_occurrence:
4009            result = exp.Anonymous(this="ARRAY_EXTRACT", expressions=[result, occurrence])
4010
4011        return self.sql(result)
4012
4013    def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
4014        return self._regexp_extract_sql(expression)
4015
4016    def regexpextractall_sql(self, expression: exp.RegexpExtractAll) -> str:
4017        return self._regexp_extract_sql(expression)
4018
4019    def regexpinstr_sql(self, expression: exp.RegexpInstr) -> str:
4020        this = expression.this
4021        pattern = expression.expression
4022        position = expression.args.get("position")
4023        orig_occ = expression.args.get("occurrence")
4024        occurrence = orig_occ or exp.Literal.number(1)
4025        option = expression.args.get("option")
4026        parameters = expression.args.get("parameters")
4027
4028        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
4029        if validated_flags:
4030            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
4031
4032        # Handle starting position offset
4033        pos_offset: exp.Expr = exp.Literal.number(0)
4034        if position and (not position.is_int or position.to_py() > 1):
4035            this = exp.Substring(this=this, start=position)
4036            pos_offset = position - exp.Literal.number(1)
4037
4038        # Helper: LIST_SUM(LIST_TRANSFORM(list[1:end], x -> LENGTH(x)))
4039        def sum_lengths(func_name: str, end: exp.Expr) -> exp.Expr:
4040            lst = exp.Bracket(
4041                this=exp.Anonymous(this=func_name, expressions=[this, pattern]),
4042                expressions=[exp.Slice(this=exp.Literal.number(1), expression=end)],
4043                offset=1,
4044            )
4045            transform = exp.Anonymous(
4046                this="LIST_TRANSFORM",
4047                expressions=[
4048                    lst,
4049                    exp.Lambda(
4050                        this=exp.Length(this=exp.to_identifier("x")),
4051                        expressions=[exp.to_identifier("x")],
4052                    ),
4053                ],
4054            )
4055            return exp.Coalesce(
4056                this=exp.Anonymous(this="LIST_SUM", expressions=[transform]),
4057                expressions=[exp.Literal.number(0)],
4058            )
4059
4060        # Position = 1 + sum(split_lengths[1:occ]) + sum(match_lengths[1:occ-1]) + offset
4061        base_pos: exp.Expr = (
4062            exp.Literal.number(1)
4063            + sum_lengths("STRING_SPLIT_REGEX", occurrence)
4064            + sum_lengths("REGEXP_EXTRACT_ALL", occurrence - exp.Literal.number(1))
4065            + pos_offset
4066        )
4067
4068        # option=1: add match length for end position
4069        if option and option.is_int and option.to_py() == 1:
4070            match_at_occ = exp.Bracket(
4071                this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern]),
4072                expressions=[occurrence],
4073                offset=1,
4074            )
4075            base_pos = base_pos + exp.Coalesce(
4076                this=exp.Length(this=match_at_occ), expressions=[exp.Literal.number(0)]
4077            )
4078
4079        # NULL checks for all provided arguments
4080        # .copy() is used strictly because .is_() alters the node's parent pointer, mutating the parsed AST
4081        null_args = [
4082            expression.this,
4083            expression.expression,
4084            position,
4085            orig_occ,
4086            option,
4087            parameters,
4088        ]
4089        null_checks = [arg.copy().is_(exp.Null()) for arg in null_args if arg]
4090
4091        matches = exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
4092
4093        return self.sql(
4094            exp.case()
4095            .when(exp.or_(*null_checks), exp.Null())
4096            .when(pattern.copy().eq(exp.Literal.string("")), exp.Literal.number(0))
4097            .when(exp.Length(this=matches) < occurrence, exp.Literal.number(0))
4098            .else_(base_pos)
4099        )
4100
4101    @unsupported_args("culture")
4102    def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
4103        fmt = expression.args.get("format")
4104        if fmt and fmt.is_int:
4105            return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
4106
4107        self.unsupported("Only integer formats are supported by NumberToStr")
4108        return self.function_fallback_sql(expression)
4109
4110    def autoincrementcolumnconstraint_sql(self, _) -> str:
4111        self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
4112        return ""
4113
4114    def aliases_sql(self, expression: exp.Aliases) -> str:
4115        this = expression.this
4116        if isinstance(this, exp.Posexplode):
4117            return self.posexplode_sql(this)
4118
4119        return super().aliases_sql(expression)
4120
4121    def posexplode_sql(self, expression: exp.Posexplode) -> str:
4122        this = expression.this
4123        parent = expression.parent
4124
4125        # The default Spark aliases are "pos" and "col", unless specified otherwise
4126        pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
4127
4128        if isinstance(parent, exp.Aliases):
4129            # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
4130            pos, col = parent.expressions
4131        elif isinstance(parent, exp.Table):
4132            # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
4133            alias = parent.args.get("alias")
4134            if alias:
4135                pos, col = alias.columns or [pos, col]
4136                alias.pop()
4137
4138        # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
4139        # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
4140        unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
4141        gen_subscripts = self.sql(
4142            exp.Alias(
4143                this=exp.Anonymous(
4144                    this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
4145                )
4146                - exp.Literal.number(1),
4147                alias=pos,
4148            )
4149        )
4150
4151        posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
4152
4153        if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
4154            # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
4155            return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
4156
4157        return posexplode_sql
4158
4159    def addmonths_sql(self, expression: exp.AddMonths) -> str:
4160        """
4161        Handles three key issues:
4162        1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
4163        2. End-of-month preservation: If input is last day of month, result is last day of result month
4164        3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
4165        """
4166        from sqlglot.optimizer.annotate_types import annotate_types
4167
4168        this = expression.this
4169        if not this.type:
4170            this = annotate_types(this, dialect=self.dialect)
4171
4172        if this.is_type(*exp.DataType.TEXT_TYPES):
4173            this = exp.Cast(this=this, to=exp.DataType(this=exp.DType.TIMESTAMP))
4174
4175        # Detect float/decimal months to apply rounding (Snowflake behavior)
4176        # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
4177        months_expr = expression.expression
4178        if not months_expr.type:
4179            months_expr = annotate_types(months_expr, dialect=self.dialect)
4180
4181        # Build interval or to_months expression based on type
4182        # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
4183        interval_or_to_months = (
4184            exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
4185            if months_expr.is_type(
4186                exp.DType.FLOAT,
4187                exp.DType.DOUBLE,
4188                exp.DType.DECIMAL,
4189            )
4190            # Integer case: standard INTERVAL N MONTH syntax
4191            else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
4192        )
4193
4194        date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
4195
4196        # Apply end-of-month preservation if Snowflake flag is set
4197        # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
4198        preserve_eom = expression.args.get("preserve_end_of_month")
4199        result_expr = (
4200            exp.case()
4201            .when(
4202                exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
4203                exp.func("LAST_DAY", date_add_expr),
4204            )
4205            .else_(date_add_expr)
4206            if preserve_eom
4207            else date_add_expr
4208        )
4209
4210        # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
4211        # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type)
4212        # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
4213        # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
4214        if this.is_type(exp.DType.DATE, exp.DType.TIMESTAMPTZ):
4215            return self.sql(exp.Cast(this=result_expr, to=this.type))
4216        return self.sql(result_expr)
4217
4218    def format_sql(self, expression: exp.Format) -> str:
4219        if expression.name.lower() == "%s" and len(expression.expressions) == 1:
4220            return self.func("FORMAT", "'{}'", expression.expressions[0])
4221
4222        return self.function_fallback_sql(expression)
4223
4224    def hexstring_sql(
4225        self, expression: exp.HexString, binary_function_repr: str | None = None
4226    ) -> str:
4227        # UNHEX('FF') correctly produces blob \xFF in DuckDB
4228        return super().hexstring_sql(expression, binary_function_repr="UNHEX")
4229
4230    def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
4231        unit = expression.args.get("unit")
4232        date = expression.this
4233
4234        week_start = _week_unit_to_dow(unit)
4235        unit = unit_to_str(expression)
4236
4237        if week_start:
4238            result = self.sql(
4239                _build_week_trunc_expression(date, week_start, preserve_start_day=True)
4240            )
4241        else:
4242            result = self.func("DATE_TRUNC", unit, date)
4243
4244        if (
4245            expression.args.get("input_type_preserved")
4246            and date.is_type(*exp.DataType.TEMPORAL_TYPES)
4247            and not (is_date_unit(unit) and date.is_type(exp.DType.DATE))
4248        ):
4249            return self.sql(exp.Cast(this=result, to=date.type))
4250
4251        return result
4252
4253    def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
4254        unit = unit_to_str(expression)
4255        zone = expression.args.get("zone")
4256        timestamp = expression.this
4257        date_unit = is_date_unit(unit)
4258
4259        if date_unit and zone:
4260            # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
4261            # Double AT TIME ZONE needed for BigQuery compatibility:
4262            # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
4263            # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
4264            timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
4265            result_sql = self.func("DATE_TRUNC", unit, timestamp)
4266            return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
4267
4268        result = self.func("DATE_TRUNC", unit, timestamp)
4269        if expression.args.get("input_type_preserved"):
4270            if timestamp.type and timestamp.is_type(exp.DType.TIME, exp.DType.TIMETZ):
4271                dummy_date = exp.Cast(
4272                    this=exp.Literal.string("1970-01-01"),
4273                    to=exp.DataType(this=exp.DType.DATE),
4274                )
4275                date_time = exp.Add(this=dummy_date, expression=timestamp)
4276                result = self.func("DATE_TRUNC", unit, date_time)
4277                return self.sql(exp.Cast(this=result, to=timestamp.type))
4278
4279            if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
4280                date_unit and timestamp.is_type(exp.DType.DATE)
4281            ):
4282                return self.sql(exp.Cast(this=result, to=timestamp.type))
4283
4284        return result
4285
4286    def trim_sql(self, expression: exp.Trim) -> str:
4287        expression.this.replace(_cast_to_varchar(expression.this))
4288        if expression.expression:
4289            expression.expression.replace(_cast_to_varchar(expression.expression))
4290
4291        result_sql = super().trim_sql(expression)
4292        return _gen_with_cast_to_blob(self, expression, result_sql)
4293
4294    def round_sql(self, expression: exp.Round) -> str:
4295        this = expression.this
4296        decimals = expression.args.get("decimals")
4297        truncate = expression.args.get("truncate")
4298
4299        # DuckDB requires the scale (decimals) argument to be an INT
4300        # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
4301        if decimals is not None and expression.args.get("casts_non_integer_decimals"):
4302            if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
4303                decimals = exp.cast(decimals, exp.DType.INT)
4304
4305        func = "ROUND"
4306        if truncate:
4307            # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
4308            if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
4309                func = "ROUND_EVEN"
4310                truncate = None
4311            # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
4312            elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
4313                truncate = None
4314
4315        return self.func(func, this, decimals, truncate)
4316
4317    def strtok_sql(self, expression: exp.Strtok) -> str:
4318        string_arg = expression.this
4319        delimiter_arg = expression.args.get("delimiter")
4320        part_index_arg = expression.args.get("part_index")
4321
4322        if delimiter_arg and part_index_arg:
4323            # Escape regex chars and build character class at runtime using REGEXP_REPLACE
4324            escaped_delimiter = exp.Anonymous(
4325                this="REGEXP_REPLACE",
4326                expressions=[
4327                    delimiter_arg,
4328                    exp.Literal.string(
4329                        r"([\[\]^.\-*+?(){}|$\\])"
4330                    ),  # Escape problematic regex chars
4331                    exp.Literal.string(
4332                        r"\\\1"
4333                    ),  # Replace with escaped version using $1 backreference
4334                    exp.Literal.string("g"),  # Global flag
4335                ],
4336            )
4337            # CASE WHEN delimiter = '' THEN '' ELSE CONCAT('[', escaped_delimiter, ']') END
4338            regex_pattern = (
4339                exp.case()
4340                .when(delimiter_arg.eq(exp.Literal.string("")), exp.Literal.string(""))
4341                .else_(
4342                    exp.func(
4343                        "CONCAT",
4344                        exp.Literal.string("["),
4345                        escaped_delimiter,
4346                        exp.Literal.string("]"),
4347                    )
4348                )
4349            )
4350
4351            # STRTOK skips empty strings, so we need to filter them out
4352            # LIST_FILTER(REGEXP_SPLIT_TO_ARRAY(string, pattern), x -> x != '')[index]
4353            split_array = exp.func("REGEXP_SPLIT_TO_ARRAY", string_arg, regex_pattern)
4354            x = exp.to_identifier("x")
4355            is_empty = x.eq(exp.Literal.string(""))
4356            filtered_array = exp.func(
4357                "LIST_FILTER",
4358                split_array,
4359                exp.Lambda(this=exp.not_(is_empty.copy()), expressions=[x.copy()]),
4360            )
4361            base_func = exp.Bracket(
4362                this=filtered_array,
4363                expressions=[part_index_arg],
4364                offset=1,
4365            )
4366
4367            # Use template with the built regex pattern
4368            result = exp.replace_placeholders(
4369                self.STRTOK_TEMPLATE.copy(),
4370                string=string_arg,
4371                delimiter=delimiter_arg,
4372                part_index=part_index_arg,
4373                base_func=base_func,
4374            )
4375
4376            return self.sql(result)
4377
4378        return self.function_fallback_sql(expression)
4379
4380    def strtoktoarray_sql(self, expression: exp.StrtokToArray) -> str:
4381        string_arg = expression.this
4382        delimiter_arg = expression.args.get("expression") or exp.Literal.string(" ")
4383
4384        escaped = exp.RegexpReplace(
4385            this=delimiter_arg.copy(),
4386            expression=exp.Literal.string(r"([\[\]^.\-*+?(){}|$\\])"),
4387            replacement=exp.Literal.string(r"\\\1"),
4388            modifiers=exp.Literal.string("g"),
4389        )
4390        return self.sql(
4391            exp.replace_placeholders(
4392                self.STRTOK_TO_ARRAY_TEMPLATE.copy(),
4393                string=string_arg,
4394                delimiter=delimiter_arg,
4395                escaped=escaped,
4396            )
4397        )
4398
4399    def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
4400        result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
4401
4402        # DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
4403        if expression.is_type(*exp.DataType.REAL_TYPES):
4404            result = f"CAST({result} AS DOUBLE)"
4405
4406        return result
4407
4408    def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
4409        """
4410        BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
4411        dividing the input distribution into n equal-sized buckets.
4412
4413        Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
4414        does not document the specific algorithm used so results may differ. DuckDB does not
4415        support RESPECT NULLS.
4416        """
4417        this = expression.this
4418        if isinstance(this, exp.Distinct):
4419            # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
4420            if len(this.expressions) < 2:
4421                self.unsupported("APPROX_QUANTILES requires a bucket count argument")
4422                return self.function_fallback_sql(expression)
4423            num_quantiles_expr = this.expressions[1].pop()
4424        else:
4425            num_quantiles_expr = expression.expression
4426
4427        if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
4428            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4429            return self.function_fallback_sql(expression)
4430
4431        num_quantiles = t.cast(int, num_quantiles_expr.to_py())
4432        if num_quantiles <= 0:
4433            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4434            return self.function_fallback_sql(expression)
4435
4436        quantiles = [
4437            exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
4438            for i in range(num_quantiles + 1)
4439        ]
4440
4441        return self.sql(exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)))
4442
4443    def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
4444        if expression.args.get("scalar_only"):
4445            expression = exp.JSONExtractScalar(
4446                this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
4447            )
4448        return _arrow_json_extract_sql(self, expression)
4449
4450    def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
4451        this = expression.this
4452
4453        if _is_binary(this):
4454            expression.type = exp.DType.BINARY.into_expr()
4455
4456        arg = _cast_to_bit(this)
4457
4458        if isinstance(this, exp.Neg):
4459            arg = exp.Paren(this=arg)
4460
4461        expression.set("this", arg)
4462
4463        result_sql = f"~{self.sql(expression, 'this')}"
4464
4465        return _gen_with_cast_to_blob(self, expression, result_sql)
4466
4467    def window_sql(self, expression: exp.Window) -> str:
4468        this = expression.this
4469        if isinstance(this, exp.Corr) or (
4470            isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
4471        ):
4472            return self._corr_sql(expression)
4473
4474        return super().window_sql(expression)
4475
4476    def filter_sql(self, expression: exp.Filter) -> str:
4477        if isinstance(expression.this, exp.Corr):
4478            return self._corr_sql(expression)
4479
4480        return super().filter_sql(expression)
4481
4482    def _corr_sql(
4483        self,
4484        expression: exp.Filter | exp.Window | exp.Corr,
4485    ) -> str:
4486        if isinstance(expression, exp.Corr) and not expression.args.get("null_on_zero_variance"):
4487            return self.func("CORR", expression.this, expression.expression)
4488
4489        corr_expr = _maybe_corr_null_to_false(expression)
4490        if corr_expr is None:
4491            if isinstance(expression, exp.Window):
4492                return super().window_sql(expression)
4493            if isinstance(expression, exp.Filter):
4494                return super().filter_sql(expression)
4495            corr_expr = expression  # make mypy happy
4496
4497        return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
4498
4499    def uuid_sql(self, expression: exp.Uuid) -> str:
4500        namespace = expression.this
4501        name = expression.args.get("name")
4502
4503        # UUID v5 (namespace + name) - Emulate using SHA1
4504        if namespace and name:
4505            result = exp.replace_placeholders(
4506                self.UUID_V5_TEMPLATE.copy(),
4507                namespace=namespace,
4508                name=name,
4509            )
4510            return self.sql(result)
4511
4512        return super().uuid_sql(expression)

Generator converts a given syntax tree to the corresponding SQL string.

Arguments:
  • pretty: Whether to format the produced SQL string. Default: False.
  • identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
  • normalize: Whether to normalize identifiers to lowercase. Default: False.
  • pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
  • indent: The indentation size in a formatted string. For example, this affects the indentation of subqueries and filters under a WHERE clause. Default: 2.
  • normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
  • unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether to preserve comments in the output SQL code. Default: True
PARAMETER_TOKEN = '$'
NAMED_PLACEHOLDER_TOKEN = '$'
JOIN_HINTS = False
TABLE_HINTS = False
QUERY_HINTS = False
LIMIT_FETCH = 'LIMIT'
STRUCT_DELIMITER = ('(', ')')
RENAME_TABLE_WITH_DB = False
NVL2_SUPPORTED = False
SEMI_ANTI_JOIN_WITH_SIDE = False
TABLESAMPLE_KEYWORDS = 'USING SAMPLE'
TABLESAMPLE_SEED_KEYWORD = 'REPEATABLE'
LAST_DAY_SUPPORTS_DATE_PART = False
JSON_KEY_VALUE_PAIR_SEP = ','
IGNORE_NULLS_IN_FUNC = True
IGNORE_NULLS_BEFORE_ORDER = False
JSON_PATH_BRACKETED_KEY_SUPPORTED = False
SUPPORTS_CREATE_TABLE_LIKE = False
MULTI_ARG_DISTINCT = False
CAN_IMPLEMENT_ARRAY_ANY = True
SUPPORTS_TO_NUMBER = False
SELECT_KINDS: tuple[str, ...] = ()
SUPPORTS_DECODE_CASE = False
SUPPORTS_DROP_ALTER_ICEBERG_PROPERTY = False
AFTER_HAVING_MODIFIER_TRANSFORMS = {'windows': <function <lambda>>, 'qualify': <function <lambda>>}
SUPPORTS_WINDOW_EXCLUDE = True
COPY_HAS_INTO_KEYWORD = False
STAR_EXCEPT = 'EXCLUDE'
PAD_FILL_PATTERN_IS_REQUIRED = True
ARRAY_SIZE_DIM_REQUIRED: bool | None = False
NORMALIZE_EXTRACT_DATE_PARTS = True
SUPPORTS_LIKE_QUANTIFIERS = False
SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
TRANSFORMS = {<class 'sqlglot.expressions.query.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.query.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.query.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.query.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.core.Adjacent'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.AllowedValuesProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.AnalyzeColumns'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.AnalyzeWith'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.ArrayContainsAll'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.ArrayOverlaps'>: <function _array_overlaps_sql>, <class 'sqlglot.expressions.constraints.AssumeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.BackupProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.math.Ceil'>: <function _ceil_floor>, <class 'sqlglot.expressions.constraints.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.CommentColumnConstraint'>: <function no_comment_column_constraint_sql>, <class 'sqlglot.expressions.functions.ConnectByRoot'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.string.ConvertToCharset'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CredentialsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.CurrentCatalog'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.SessionUser'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ApiProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ApplicationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.CatalogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ComputeProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.DatabaseProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.DynamicProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.EmptyProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.EndStatement'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.EnviromentProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.HandlerProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ParameterStyleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.EphemeralColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ExcludeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Except'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.math.Floor'>: <function _ceil_floor>, <class 'sqlglot.expressions.query.Get'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.GlobalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.HybridProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.IcebergProperty'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.properties.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Intersect'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.datatypes.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.Int64'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONBContainsAnyTopKeys'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONBContainsAllTopKeys'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONBDeleteAtPath'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONObject'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.json.JSONObjectAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.properties.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.MaskingProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.functions.NetFunc'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.NetworkProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.NonClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.NotForReplicationColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.OnProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.Operator'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.ExtendsLeft'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.ExtendsRight'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.PartitionedByBucket'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.PartitionByTruncate'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.PivotAny'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.PositionalColumn'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ProjectionPolicyColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.InvisibleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.ZeroFillColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Put'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ReturnsProperty'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.properties.RowAccessProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.core.SafeFunc'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SecureProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SecurityIntegrationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SharingProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Stream'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.StreamingTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.StrictProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ddl.SwapTable'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.TableColumn'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.Tags'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.ToMap'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.VirtualProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ddl.TriggerExecute'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Union'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.UnloggedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.UsingTemplateProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.UsingData'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.temporal.UtcDate'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.temporal.UtcTime'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.temporal.UtcTimestamp'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.query.Variadic'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.array.VarMap'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ViewAttributeProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.WithProcedureOptions'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.WithSchemaBindingProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.constraints.WithOperator'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.properties.ForceProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.aggregate.AnyValue'>: <function _anyvalue_sql>, <class 'sqlglot.expressions.core.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.math.Boolnot'>: <function _boolnot_sql>, <class 'sqlglot.expressions.math.Booland'>: <function _booland_sql>, <class 'sqlglot.expressions.math.Boolor'>: <function _boolor_sql>, <class 'sqlglot.expressions.array.Array'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.array.ArrayAppend'>: <function array_append_sql.<locals>._array_append_sql>, <class 'sqlglot.expressions.array.ArrayCompact'>: <function array_compact_sql>, <class 'sqlglot.expressions.array.ArrayConstructCompact'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.ArrayConcat'>: <function array_concat_sql.<locals>._array_concat_sql>, <class 'sqlglot.expressions.array.ArrayContains'>: <function _array_contains_sql>, <class 'sqlglot.expressions.array.ArrayFilter'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.array.ArrayInsert'>: <function _array_insert_sql>, <class 'sqlglot.expressions.array.ArrayPosition'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.ArrayRemoveAt'>: <function _array_remove_at_sql>, <class 'sqlglot.expressions.array.ArrayRemove'>: <function remove_from_array_using_filter>, <class 'sqlglot.expressions.array.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.array.ArrayPrepend'>: <function array_append_sql.<locals>._array_append_sql>, <class 'sqlglot.expressions.array.ArraySum'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.array.ArrayMax'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.array.ArrayMin'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.string.Base64DecodeBinary'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Base64DecodeString'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.core.BitwiseAnd'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.BitwiseAndAgg'>: <function _bitwise_agg_sql>, <class 'sqlglot.expressions.math.BitwiseCount'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.core.BitwiseLeftShift'>: <function _bitshift_sql>, <class 'sqlglot.expressions.core.BitwiseOr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.BitwiseOrAgg'>: <function _bitwise_agg_sql>, <class 'sqlglot.expressions.core.BitwiseRightShift'>: <function _bitshift_sql>, <class 'sqlglot.expressions.math.BitwiseXorAgg'>: <function _bitwise_agg_sql>, <class 'sqlglot.expressions.aggregate.Corr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.CosineDistance'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.CurrentTime'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.CurrentSchemas'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.CurrentTimestamp'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.CurrentVersion'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.Localtime'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfWeek'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfWeekIso'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DayOfYear'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.Dayname'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.Monthname'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.datatypes.DataType'>: <function _datatype_sql>, <class 'sqlglot.expressions.temporal.Date'>: <function _date_sql>, <class 'sqlglot.expressions.temporal.DateAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DateFromParts'>: <function _date_from_parts_sql>, <class 'sqlglot.expressions.temporal.DateSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.temporal.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.temporal.Datetime'>: <function no_datetime_sql>, <class 'sqlglot.expressions.temporal.DatetimeDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.temporal.DatetimeSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DatetimeAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.DateToDi'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Decode'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.HexDecodeString'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.DiToDate'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Encode'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.EqualNull'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.EuclideanDistance'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.GenerateDateArray'>: <function _generate_datetime_array_sql>, <class 'sqlglot.expressions.array.GenerateSeries'>: <function generate_series_sql.<locals>._generate_series_sql>, <class 'sqlglot.expressions.temporal.GenerateTimestampArray'>: <function _generate_datetime_array_sql>, <class 'sqlglot.expressions.math.Getbit'>: <function getbit_sql>, <class 'sqlglot.expressions.aggregate.GroupConcat'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.Explode'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.core.IntDiv'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.IsInf'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.math.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.functions.IsNullValue'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.IsArray'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.json.JSONBExists'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.json.JSONExtract'>: <function _arrow_json_extract_sql>, <class 'sqlglot.expressions.json.JSONExtractArray'>: <function _json_extract_value_array_sql>, <class 'sqlglot.expressions.json.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.query.JSONValueArray'>: <function _json_extract_value_array_sql>, <class 'sqlglot.expressions.query.Lateral'>: <function _explode_to_unnest_sql>, <class 'sqlglot.expressions.aggregate.LogicalOr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.aggregate.LogicalAnd'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.query.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.functions.Seq1'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.Seq2'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.Seq4'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.Seq8'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.math.BoolxorAgg'>: <function _boolxor_agg_sql>, <class 'sqlglot.expressions.temporal.MakeInterval'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.Initcap'>: <function _initcap_sql>, <class 'sqlglot.expressions.string.MD5Digest'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA1Digest'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA2'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.SHA2Digest'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.MonthsBetween'>: <function months_between_sql>, <class 'sqlglot.expressions.temporal.NextDay'>: <function _day_navigation_sql>, <class 'sqlglot.expressions.aggregate.PercentileCont'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.aggregate.PercentileDisc'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.query.Pivot'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.temporal.PreviousDay'>: <function _day_navigation_sql>, <class 'sqlglot.expressions.string.RegexpILike'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.string.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.aggregate.RegrValx'>: <function _regr_val_sql>, <class 'sqlglot.expressions.aggregate.RegrValy'>: <function _regr_val_sql>, <class 'sqlglot.expressions.query.Return'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.StrToUnix'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.array.Struct'>: <function _struct_sql>, <class 'sqlglot.expressions.array.Transform'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.TimeAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TimeSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.Time'>: <function no_time_sql>, <class 'sqlglot.expressions.temporal.TimeDiff'>: <function _timediff_sql>, <class 'sqlglot.expressions.temporal.Timestamp'>: <function no_timestamp_sql>, <class 'sqlglot.expressions.temporal.TimestampAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TimestampDiff'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimestampSub'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TimeStrToDate'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.temporal.TimeStrToUnix'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimeToStr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.functions.ToBoolean'>: <function _to_boolean_sql>, <class 'sqlglot.expressions.functions.ToVariant'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.TsOrDiToDi'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.TsOrDsAdd'>: <function _date_delta_to_binary_interval_op.<locals>._duckdb_date_delta_sql>, <class 'sqlglot.expressions.temporal.TsOrDsDiff'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixMicros'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixMillis'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixSeconds'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixToStr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.DatetimeTrunc'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.UnixToTime'>: <function _unix_to_time_sql>, <class 'sqlglot.expressions.temporal.UnixToTimeStr'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.aggregate.VariancePop'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.WeekOfYear'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.YearOfWeek'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.temporal.YearOfWeekIso'>: <function DuckDBGenerator.<lambda>>, <class 'sqlglot.expressions.core.Xor'>: <function _xor_sql>, <class 'sqlglot.expressions.json.JSONBObjectAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.DateBin'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.temporal.LastDay'>: <function _last_day_sql>}
TYPE_MAPPING = {<DType.DATETIME2: 'DATETIME2'>: 'TIMESTAMP', <DType.NCHAR: 'NCHAR'>: 'TEXT', <DType.NVARCHAR: 'NVARCHAR'>: 'TEXT', <DType.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <DType.LONGTEXT: 'LONGTEXT'>: 'TEXT', <DType.TINYTEXT: 'TINYTEXT'>: 'TEXT', <DType.BLOB: 'BLOB'>: 'VARBINARY', <DType.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <DType.LONGBLOB: 'LONGBLOB'>: 'BLOB', <DType.TINYBLOB: 'TINYBLOB'>: 'BLOB', <DType.INET: 'INET'>: 'INET', <DType.ROWVERSION: 'ROWVERSION'>: 'BLOB', <DType.SMALLDATETIME: 'SMALLDATETIME'>: 'TIMESTAMP', <DType.BINARY: 'BINARY'>: 'BLOB', <DType.BPCHAR: 'BPCHAR'>: 'TEXT', <DType.CHAR: 'CHAR'>: 'TEXT', <DType.DATETIME: 'DATETIME'>: 'TIMESTAMP', <DType.DECFLOAT: 'DECFLOAT'>: 'DECIMAL', <DType.FLOAT: 'FLOAT'>: 'REAL', <DType.JSONB: 'JSONB'>: 'JSON', <DType.UINT: 'UINT'>: 'UINTEGER', <DType.VARBINARY: 'VARBINARY'>: 'BLOB', <DType.VARCHAR: 'VARCHAR'>: 'TEXT', <DType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>: 'TIMESTAMPTZ', <DType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>: 'TIMESTAMP', <DType.TIMESTAMP_S: 'TIMESTAMP_S'>: 'TIMESTAMP_S', <DType.TIMESTAMP_MS: 'TIMESTAMP_MS'>: 'TIMESTAMP_MS', <DType.TIMESTAMP_NS: 'TIMESTAMP_NS'>: 'TIMESTAMP_NS', <DType.BIGDECIMAL: 'BIGDECIMAL'>: 'DECIMAL'}
TYPE_PARAM_SETTINGS = {<DType.BIGDECIMAL: 'BIGDECIMAL'>: ((38, 5), (38, 38)), <DType.DECFLOAT: 'DECFLOAT'>: ((38, 5), (38, 38))}
RESERVED_KEYWORDS = {'table', 'with', 'limit', 'all', 'array', 'collate', 'check_p', 'references', 'as', 'asymmetric', 'current_timestamp', 'localtime', 'localtimestamp', 'having', 'true_p', 'or', 'except', 'fetch', 'some', 'only', 'column', 'current_role', 'into', 'else', 'analyse', 'analyze', 'then', 'offset', 'current_user', 'in_p', 'grant', 'distinct', 'to', 'cast', 'union', 'initially', 'where', 'variadic', 'user', 'trailing', 'session_user', 'default', 'asc_p', 'constraint', 'end_p', 'from', 'current_time', 'null_p', 'desc_p', 'both', 'unique', 'leading', 'any', 'group_p', 'window', 'current_catalog', 'intersect', 'not', 'current_date', 'on', 'placing', 'order', 'do', 'false_p', 'deferrable', 'lateral_p', 'primary', 'case', 'when', 'for', 'symmetric', 'returning', 'and', 'create_p', 'using', 'foreign', 'select'}
UNWRAPPED_INTERVAL_VALUES = (<class 'sqlglot.expressions.core.Literal'>, <class 'sqlglot.expressions.core.Paren'>)
PROPERTIES_LOCATION = {<class 'sqlglot.expressions.properties.AllowedValuesProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.AlgorithmProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ApiProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ApplicationProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.AutoIncrementProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.AutoRefreshProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.BackupProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.BlockCompressionProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CatalogProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CharacterSetProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ChecksumProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CollateProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ComputeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.CopyGrantsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.query.Cluster'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ClusteredByProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DistributedByProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DuplicateKeyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DataBlocksizeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DatabaseProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DataDeletionProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DefinerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DictRange'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DictProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DynamicProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DistKeyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.DistStyleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EmptyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EncodeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EngineProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.EnviromentProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.HandlerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ParameterStyleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ExecuteAsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ExternalProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.FallbackProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.FileFormatProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.FreespaceProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.GlobalProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.HeapProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.HybridProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.InheritsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.IcebergProperty'>: <PropertiesLocation.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.properties.IncludeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.InputModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.IsolatedLoadingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.JournalProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LanguageProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LikeProperty'>: <PropertiesLocation.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.properties.LocationProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LockProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LockingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.LogProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.MaskingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.MaterializedProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.MergeBlockRatioProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ModuleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.NetworkProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.NoPrimaryIndexProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.OnProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.OnCommitProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.query.Order'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.OutputModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.PartitionedByProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.PartitionedOfProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.constraints.PrimaryKey'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.Property'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RefreshTriggerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RemoteWithConnectionModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ReturnsProperty'>: <PropertiesLocation.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.properties.RollupProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowAccessProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowFormatProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowFormatDelimitedProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.RowFormatSerdeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SampleProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SchemaCommentProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SecureProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SecurityIntegrationProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SerdeProperties'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.ddl.Set'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SettingsProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SetProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SetConfigProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SharingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.ddl.SequenceProperties'>: <PropertiesLocation.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.ddl.TriggerProperties'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SortKeyProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SqlReadWriteProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.SqlSecurityProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StabilityProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StorageHandlerProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StreamingTableProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.StrictProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.Tags'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.TemporaryProperty'>: <PropertiesLocation.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.properties.ToTableProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.TransientProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.TransformModelProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.ddl.MergeTreeTTL'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.UnloggedProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.UsingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.UsingTemplateProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ViewAttributeProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.VirtualProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.VolatileProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithDataProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithJournalTableProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithProcedureOptions'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithSchemaBindingProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.WithSystemVersioningProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.properties.ForceProperty'>: <PropertiesLocation.UNSUPPORTED: 'UNSUPPORTED'>}
ZIPF_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Min( this=Column( this=Identifier(this=i, quoted=False)))], from_=From( this=Table( this=Identifier(this=cdf, quoted=False))), where=Where( this=GTE( this=Column( this=Identifier(this=p, quoted=False)), expression=Subquery( this=Select( expressions=[ Column( this=Identifier(this=r, quoted=False))], from_=From( this=Table( this=Identifier(this=rand, quoted=False))))))), with_=With( expressions=[ CTE( this=Select( expressions=[ Alias( this=Placeholder(this=random_expr), alias=Identifier(this=r, quoted=False))]), alias=TableAlias( this=Identifier(this=rand, quoted=False))), CTE( this=Select( expressions=[ Column( this=Identifier(this=i, quoted=False)), Alias( this=Div( this=Literal(this=1.0, is_string=False), expression=Pow( this=Column( this=Identifier(this=i, quoted=False)), expression=Placeholder(this=s)), typed=False, safe=False), alias=Identifier(this=w, quoted=False))], from_=From( this=Table( this=Anonymous( this=RANGE, expressions=[ Literal(this=1, is_string=False), Add( this=Placeholder(this=n), expression=Literal(this=1, is_string=False))]), alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=i, quoted=False)])))), alias=TableAlias( this=Identifier(this=weights, quoted=False))), CTE( this=Select( expressions=[ Column( this=Identifier(this=i, quoted=False)), Alias( this=Div( this=Window( this=Sum( this=Column( this=Identifier(this=w, quoted=False))), order=Order( expressions=[ Ordered( this=Column( this=Identifier(this=i, quoted=False)), nulls_first=True)]), over=OVER), expression=Window( this=Sum( this=Column( this=Identifier(this=w, quoted=False))), over=OVER), typed=False, safe=False), alias=Identifier(this=p, quoted=False))], from_=From( this=Table( this=Identifier(this=weights, quoted=False)))), alias=TableAlias( this=Identifier(this=cdf, quoted=False)))]))
NORMAL_TEMPLATE: sqlglot.expressions.core.Expr = Add( this=Placeholder(this=mean), expression=Paren( this=Mul( this=Mul( this=Placeholder(this=stddev), expression=Sqrt( this=Mul( this=Neg( this=Literal(this=2, is_string=False)), expression=Ln( this=Greatest( this=Placeholder(this=u1), expressions=[ Literal(this=1e-10, is_string=False)], ignore_nulls=True))))), expression=Cos( this=Mul( this=Mul( this=Literal(this=2, is_string=False), expression=Pi()), expression=Placeholder(this=u2))))))
SEEDED_RANDOM_TEMPLATE: sqlglot.expressions.core.Expr = Div( this=Paren( this=Mod( this=Abs( this=Anonymous( this=HASH, expressions=[ Placeholder(this=seed)])), expression=Literal(this=1000000, is_string=False))), expression=Literal(this=1000000.0, is_string=False), typed=False, safe=False)
SEQ_UNSIGNED: sqlglot.expressions.core.Expr = Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val))
SEQ_SIGNED: sqlglot.expressions.core.Expr = Paren( this=Case( ifs=[ If( this=GTE( this=Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val)), expression=Placeholder(this=half)), true=Sub( this=Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val)), expression=Placeholder(this=max_val)))], default=Mod( this=Placeholder(this=base), expression=Placeholder(this=max_val))))
MAPCAT_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Or( this=Is( this=Placeholder(this=map1), expression=Null()), expression=Is( this=Placeholder(this=map2), expression=Null())), true=Null())], default=MapFromEntries( this=Anonymous( this=LIST_FILTER, expressions=[ Anonymous( this=LIST_TRANSFORM, expressions=[ Anonymous( this=LIST_DISTINCT, expressions=[ Anonymous( this=LIST_CONCAT, expressions=[ MapKeys( this=Placeholder(this=map1)), MapKeys( this=Placeholder(this=map2))])]), Lambda( this=Anonymous( this=STRUCT_PACK, expressions=[ PropertyEQ( this=Identifier(this=key, quoted=False), expression=Identifier(this=__k, quoted=False)), PropertyEQ( this=Identifier(this=value, quoted=False), expression=Coalesce( this=Bracket( this=Placeholder(this=map2), expressions=[ Identifier(this=__k, quoted=False)]), expressions=[ Bracket( this=Placeholder(this=map1), expressions=[ Identifier(this=__k, quoted=False)])]))]), expressions=[ Identifier(this=__k, quoted=False)])]), Lambda( this=Not( this=Is( this=Dot( this=Identifier(this=__x, quoted=False), expression=Identifier(this=value, quoted=False)), expression=Null())), expressions=[ Identifier(this=__x, quoted=False)])])))
EXTRACT_STRFTIME_MAPPINGS: dict[str, tuple[str, str]] = {'WEEKISO': ('%V', 'INTEGER'), 'YEAROFWEEK': ('%G', 'INTEGER'), 'YEAROFWEEKISO': ('%G', 'INTEGER'), 'NANOSECOND': ('%n', 'BIGINT')}
EXTRACT_EPOCH_MAPPINGS: dict[str, str] = {'EPOCH_SECOND': 'EPOCH', 'EPOCH_MILLISECOND': 'EPOCH_MS', 'EPOCH_MICROSECOND': 'EPOCH_US', 'EPOCH_NANOSECOND': 'EPOCH_NS'}
BITMAP_CONSTRUCT_AGG_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Case( ifs=[ If( this=Or( this=Is( this=Column( this=Identifier(this=l, quoted=False)), expression=Null()), expression=EQ( this=Length( this=Column( this=Identifier(this=l, quoted=False))), expression=Literal(this=0, is_string=False))), true=Null()), If( this=NEQ( this=Length( this=Column( this=Identifier(this=l, quoted=False))), expression=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Column( this=Identifier(this=l, quoted=False)), Lambda( this=Between( this=Identifier(this=__v, quoted=False), low=Literal(this=0, is_string=False), high=Literal(this=32767, is_string=False)), expressions=[ Identifier(this=__v, quoted=False)])]))), true=Null()), If( this=LT( this=Length( this=Column( this=Identifier(this=l, quoted=False))), expression=Literal(this=5, is_string=False)), true=Unhex( this=DPipe( this=DPipe( this=Anonymous( this=PRINTF, expressions=[ Literal(this='%04X', is_string=True), Length( this=Column( this=Identifier(this=l, quoted=False)))]), expression=Column( this=Identifier(this=h, quoted=False)), safe=True), expression=Repeat( this=Literal(this='00', is_string=True), times=Mul( this=Greatest( this=Literal(this=0, is_string=False), expressions=[ Sub( this=Literal(this=4, is_string=False), expression=Length( this=Column( this=Identifier(this=l, quoted=False))))], ignore_nulls=True), expression=Literal(this=2, is_string=False))), safe=True)))], default=Unhex( this=DPipe( this=Literal(this='08000000000000000000', is_string=True), expression=Column( this=Identifier(this=h, quoted=False)), safe=True)))], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=l, quoted=False)), Alias( this=Coalesce( this=Anonymous( this=LIST_REDUCE, expressions=[ Anonymous( this=LIST_TRANSFORM, expressions=[ Column( this=Identifier(this=l, quoted=False)), Lambda( this=Anonymous( this=PRINTF, expressions=[ Literal(this='%02X%02X', is_string=True), BitwiseAnd( this=Cast( this=Identifier(this=__x, quoted=False), to=DataType(this=DType.INT, nested=False), _type=DataType(this=DType.INT, nested=False)), expression=Literal(this=255, is_string=False)), BitwiseAnd( this=Paren( this=BitwiseRightShift( this=Cast( this=Identifier(this=__x, quoted=False), to=DataType(this=DType.INT, nested=False), _type=DataType(this=DType.INT, nested=False)), expression=Literal(this=8, is_string=False))), expression=Literal(this=255, is_string=False))]), expressions=[ Identifier(this=__x, quoted=False)])]), Lambda( this=DPipe( this=Identifier(this=__a, quoted=False), expression=Identifier(this=__b, quoted=False), safe=True), expressions=[ Identifier(this=__a, quoted=False), Identifier(this=__b, quoted=False)]), Literal(this='', is_string=True)]), expressions=[ Literal(this='', is_string=True)]), alias=Identifier(this=h, quoted=False))], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=Anonymous( this=LIST_SORT, expressions=[ Anonymous( this=LIST_DISTINCT, expressions=[ Filter( this=List( expressions=[ Placeholder(this=arg)]), expression=Where( this=Not( this=Is( this=Placeholder(this=arg), expression=Null()))))])]), alias=Identifier(this=l, quoted=False))])))))))
RANDSTR_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Anonymous( this=LISTAGG, expressions=[ Substring( this=Literal(this='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', is_string=True), start=Add( this=Literal(this=1, is_string=False), expression=Cast( this=Floor( this=Mul( this=Column( this=Identifier(this=random_value, quoted=False)), expression=Literal(this=62, is_string=False))), to=DataType(this=DType.INT, nested=False), _type=DataType(this=DType.INT, nested=False))), length=Literal(this=1, is_string=False)), Literal(this='', is_string=True)])], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=Div( this=Paren( this=Mod( this=Abs( this=Anonymous( this=HASH, expressions=[ Add( this=Column( this=Identifier(this=i, quoted=False)), expression=Placeholder(this=seed))])), expression=Literal(this=1000, is_string=False))), expression=Literal(this=1000.0, is_string=False), typed=False, safe=False), alias=Identifier(this=random_value, quoted=False))], from_=From( this=Table( this=Anonymous( this=RANGE, expressions=[ Placeholder(this=length)]), alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=i, quoted=False)])))))))
MINHASH_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ JSONObject( expressions=[ JSONKeyValue( this=Literal(this='state', is_string=True), expression=List( expressions=[ Order( this=Column( this=Identifier(this=min_h, quoted=False)), expressions=[ Ordered( this=Column( this=Identifier(this=seed, quoted=False)), nulls_first=True)])])), JSONKeyValue( this=Literal(this='type', is_string=True), expression=Literal(this='minhash', is_string=True)), JSONKeyValue( this=Literal(this='version', is_string=True), expression=Literal(this=1, is_string=False))], return_type=False, encoding=False)], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=seed, quoted=False)), Alias( this=Anonymous( this=LIST_MIN, expressions=[ Anonymous( this=LIST_TRANSFORM, expressions=[ Column( this=Identifier(this=vals, quoted=False)), Lambda( this=Anonymous( this=HASH, expressions=[ DPipe( this=Cast( this=Identifier(this=__v, quoted=False), to=DataType(this=DType.VARCHAR, nested=False), _type=DataType(this=DType.VARCHAR, nested=False)), expression=Cast( this=Column( this=Identifier(this=seed, quoted=False)), to=DataType(this=DType.VARCHAR, nested=False), _type=DataType(this=DType.VARCHAR, nested=False)), safe=True)]), expressions=[ Identifier(this=__v, quoted=False)])])]), alias=Identifier(this=min_h, quoted=False))], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=List( expressions=[ Placeholder(this=expr)]), alias=Identifier(this=vals, quoted=False))]))), joins=[ Join( this=Table( this=Anonymous( this=RANGE, expressions=[ Literal(this=0, is_string=False), Placeholder(this=k)]), alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=seed, quoted=False)])))]))))
MINHASH_COMBINE_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ JSONObject( expressions=[ JSONKeyValue( this=Literal(this='state', is_string=True), expression=List( expressions=[ Order( this=Column( this=Identifier(this=min_h, quoted=False)), expressions=[ Ordered( this=Column( this=Identifier(this=idx, quoted=False)), nulls_first=True)])])), JSONKeyValue( this=Literal(this='type', is_string=True), expression=Literal(this='minhash', is_string=True)), JSONKeyValue( this=Literal(this='version', is_string=True), expression=Literal(this=1, is_string=False))], return_type=False, encoding=False)], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=Column( this=Identifier(this=pos, quoted=False)), alias=Identifier(this=idx, quoted=False)), Alias( this=Min( this=Column( this=Identifier(this=val, quoted=False))), alias=Identifier(this=min_h, quoted=False))], from_=From( this=Unnest( expressions=[ List( expressions=[ Placeholder(this=expr)])], alias=TableAlias( this=Identifier(this=_, quoted=False), columns=[ Identifier(this=sig, quoted=False)]), offset=False)), joins=[ Join( this=Unnest( expressions=[ Cast( this=JSONExtract( this=Column( this=Identifier(this=sig, quoted=False)), expression=JSONPath( expressions=[ JSONPathRoot(), JSONPathKey(this=state)]), only_json_types=False), to=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True), _type=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True))], alias=TableAlias( this=Identifier(this=t, quoted=False), columns=[ Identifier(this=val, quoted=False)]), offset=Identifier(this=pos, quoted=False)))], group=Group( expressions=[ Column( this=Identifier(this=pos, quoted=False))])))))
APPROXIMATE_SIMILARITY_TEMPLATE: sqlglot.expressions.core.Expr = Select( expressions=[ Div( this=Cast( this=Sum( this=Case( ifs=[ If( this=EQ( this=Column( this=Identifier(this=num_distinct, quoted=False)), expression=Literal(this=1, is_string=False)), true=Literal(this=1, is_string=False))], default=Literal(this=0, is_string=False))), to=DataType(this=DType.DOUBLE, nested=False), _type=DataType(this=DType.DOUBLE, nested=False)), expression=Count( this=Star(), big_int=True), typed=False, safe=False)], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=pos, quoted=False)), Alias( this=Count( this=Distinct( expressions=[ Column( this=Identifier(this=h, quoted=False))]), big_int=True), alias=Identifier(this=num_distinct, quoted=False))], from_=From( this=Subquery( this=Select( expressions=[ Column( this=Identifier(this=h, quoted=False)), Column( this=Identifier(this=pos, quoted=False))], from_=From( this=Unnest( expressions=[ List( expressions=[ Placeholder(this=expr)])], alias=TableAlias( this=Identifier(this=_, quoted=False), columns=[ Identifier(this=sig, quoted=False)]), offset=False)), joins=[ Join( this=Unnest( expressions=[ Cast( this=JSONExtract( this=Column( this=Identifier(this=sig, quoted=False)), expression=JSONPath( expressions=[ JSONPathRoot(), JSONPathKey(this=state)]), only_json_types=False), to=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True), _type=DataType( this=DType.ARRAY, expressions=[ DataType(this=DType.USERDEFINED, kind=UBIGINT)], nested=True))], alias=TableAlias( this=Identifier(this=s, quoted=False), columns=[ Identifier(this=h, quoted=False)]), offset=Identifier(this=pos, quoted=False)))]))), group=Group( expressions=[ Column( this=Identifier(this=pos, quoted=False))])))))
ARRAYS_ZIP_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Placeholder(this=null_check), true=Null()), If( this=Placeholder(this=all_empty_check), true=Array( expressions=[ Placeholder(this=empty_struct)]))], default=Anonymous( this=LIST_TRANSFORM, expressions=[ Anonymous( this=RANGE, expressions=[ Literal(this=0, is_string=False), Placeholder(this=max_len)]), Lambda( this=Placeholder(this=transform_struct), expressions=[ Identifier(this=__i, quoted=False)])]))
UUID_V5_TEMPLATE: sqlglot.expressions.core.Expr = Subquery( this=Select( expressions=[ Lower( this=DPipe( this=DPipe( this=DPipe( this=DPipe( this=DPipe( this=DPipe( this=DPipe( this=DPipe( this=DPipe( this=DPipe( this=Substring( this=Column( this=Identifier(this=h, quoted=False)), start=Literal(this=1, is_string=False), length=Literal(this=8, is_string=False)), expression=Literal(this='-', is_string=True), safe=True), expression=Substring( this=Column( this=Identifier(this=h, quoted=False)), start=Literal(this=9, is_string=False), length=Literal(this=4, is_string=False)), safe=True), expression=Literal(this='-', is_string=True), safe=True), expression=Literal(this='5', is_string=True), safe=True), expression=Substring( this=Column( this=Identifier(this=h, quoted=False)), start=Literal(this=14, is_string=False), length=Literal(this=3, is_string=False)), safe=True), expression=Literal(this='-', is_string=True), safe=True), expression=Format( this=Literal(this='{:02x}', is_string=True), expressions=[ BitwiseOr( this=BitwiseAnd( this=Cast( this=DPipe( this=Literal(this='0x', is_string=True), expression=Substring( this=Column( this=Identifier(this=h, quoted=False)), start=Literal(this=17, is_string=False), length=Literal(this=2, is_string=False)), safe=True), to=DataType(this=DType.INT, nested=False), _type=DataType(this=DType.INT, nested=False)), expression=Literal(this=63, is_string=False)), expression=Literal(this=128, is_string=False))]), safe=True), expression=Substring( this=Column( this=Identifier(this=h, quoted=False)), start=Literal(this=19, is_string=False), length=Literal(this=2, is_string=False)), safe=True), expression=Literal(this='-', is_string=True), safe=True), expression=Substring( this=Column( this=Identifier(this=h, quoted=False)), start=Literal(this=21, is_string=False), length=Literal(this=12, is_string=False)), safe=True))], from_=From( this=Subquery( this=Select( expressions=[ Alias( this=Substring( this=SHA( this=DPipe( this=Unhex( this=Replace( this=Placeholder(this=namespace), expression=Literal(this='-', is_string=True), replacement=Literal(this='', is_string=True))), expression=Encode( this=Placeholder(this=name), charset=Literal(this='utf8', is_string=True)), safe=True)), start=Literal(this=1, is_string=False), length=Literal(this=32, is_string=False)), alias=Identifier(this=h, quoted=False))])))))
ARRAY_BAG_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Or( this=Is( this=Placeholder(this=arr1), expression=Null()), expression=Is( this=Placeholder(this=arr2), expression=Null())), true=Null())], default=Anonymous( this=LIST_TRANSFORM, expressions=[ Anonymous( this=LIST_FILTER, expressions=[ Anonymous( this=LIST_ZIP, expressions=[ Placeholder(this=arr1), GenerateSeries( start=Literal(this=1, is_string=False), end=Length( this=Placeholder(this=arr1)))]), Lambda( this=Placeholder(this=cond), expressions=[ Identifier(this=pair, quoted=False)])]), Lambda( this=Bracket( this=Identifier(this=pair, quoted=False), expressions=[ Literal(this=0, is_string=False)]), expressions=[ Identifier(this=pair, quoted=False)])]))
ARRAY_EXCEPT_CONDITION: sqlglot.expressions.core.Expr = GT( this=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Bracket( this=Placeholder(this=arr1), expressions=[ Slice( this=Literal(this=1, is_string=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=1, is_string=False)]))]), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])), expression=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Placeholder(this=arr2), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])))
ARRAY_INTERSECTION_CONDITION: sqlglot.expressions.core.Expr = LTE( this=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Bracket( this=Placeholder(this=arr1), expressions=[ Slice( this=Literal(this=1, is_string=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=1, is_string=False)]))]), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])), expression=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Placeholder(this=arr2), Lambda( this=NullSafeEQ( this=Identifier(this=e, quoted=False), expression=Bracket( this=Column( this=Identifier(this=pair, quoted=False)), expressions=[ Literal(this=0, is_string=False)])), expressions=[ Identifier(this=e, quoted=False)])])))
ARRAY_EXCEPT_SET_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Or( this=Is( this=Placeholder(this=arr1), expression=Null()), expression=Is( this=Placeholder(this=arr2), expression=Null())), true=Null())], default=Anonymous( this=LIST_FILTER, expressions=[ Anonymous( this=LIST_DISTINCT, expressions=[ Placeholder(this=arr1)]), Lambda( this=EQ( this=Length( this=Anonymous( this=LIST_FILTER, expressions=[ Placeholder(this=arr2), Lambda( this=NullSafeEQ( this=Identifier(this=x, quoted=False), expression=Identifier(this=e, quoted=False)), expressions=[ Identifier(this=x, quoted=False)])])), expression=Literal(this=0, is_string=False)), expressions=[ Identifier(this=e, quoted=False)])]))
STRTOK_TO_ARRAY_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=Is( this=Placeholder(this=delimiter), expression=Null()), true=Null())], default=Anonymous( this=LIST_FILTER, expressions=[ Anonymous( this=REGEXP_SPLIT_TO_ARRAY, expressions=[ Placeholder(this=string), Case( ifs=[ If( this=EQ( this=Placeholder(this=delimiter), expression=Literal(this='', is_string=True)), true=Literal(this='.^', is_string=True))], default=Concat( expressions=[ Literal(this='[', is_string=True), Placeholder(this=escaped), Literal(this=']', is_string=True)], safe=True, coalesce=False))]), Lambda( this=Not( this=EQ( this=Identifier(this=x, quoted=False), expression=Literal(this='', is_string=True))), expressions=[ Identifier(this=x, quoted=False)])]))
STRTOK_TEMPLATE: sqlglot.expressions.core.Expr = Case( ifs=[ If( this=And( this=EQ( this=Placeholder(this=delimiter), expression=Literal(this='', is_string=True)), expression=EQ( this=Placeholder(this=string), expression=Literal(this='', is_string=True))), true=Null()), If( this=And( this=EQ( this=Placeholder(this=delimiter), expression=Literal(this='', is_string=True)), expression=EQ( this=Placeholder(this=part_index), expression=Literal(this=1, is_string=False))), true=Placeholder(this=string)), If( this=EQ( this=Placeholder(this=delimiter), expression=Literal(this='', is_string=True)), true=Null()), If( this=LT( this=Placeholder(this=part_index), expression=Literal(this=0, is_string=False)), true=Null()), If( this=Or( this=Or( this=Is( this=Placeholder(this=string), expression=Null()), expression=Is( this=Placeholder(this=delimiter), expression=Null())), expression=Is( this=Placeholder(this=part_index), expression=Null())), true=Null())], default=Placeholder(this=base_func))
def timeslice_sql(self, expression: sqlglot.expressions.temporal.TimeSlice) -> str:
2191    def timeslice_sql(self, expression: exp.TimeSlice) -> str:
2192        """
2193        Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
2194
2195        Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
2196        DuckDB:    time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
2197
2198        For 'END' kind, add the interval to get the end of the slice.
2199        For DATE type with 'END', cast result back to DATE to preserve type.
2200        """
2201        date_expr = expression.this
2202        slice_length = expression.expression
2203        unit = expression.unit
2204        kind = expression.text("kind").upper()
2205
2206        # Create INTERVAL expression: INTERVAL 'N' UNIT
2207        interval_expr = exp.Interval(this=slice_length, unit=unit)
2208
2209        # Create base time_bucket expression
2210        time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
2211
2212        # Check if we need the end of the slice (default is start)
2213        if not kind == "END":
2214            # For 'START', return time_bucket directly
2215            return self.sql(time_bucket_expr)
2216
2217        # For 'END', add the interval to get end of slice
2218        add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
2219
2220        # If input is DATE type, cast result back to DATE to preserve type
2221        # DuckDB converts DATE to TIMESTAMP when adding intervals
2222        if date_expr.is_type(exp.DType.DATE):
2223            return self.sql(exp.cast(add_expr, exp.DType.DATE))
2224
2225        return self.sql(add_expr)

Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.

Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END']) DuckDB: time_bucket(INTERVAL 'slice_length' UNIT, date_expr)

For 'END' kind, add the interval to get the end of the slice. For DATE type with 'END', cast result back to DATE to preserve type.

def bitmapbucketnumber_sql(self, expression: sqlglot.expressions.math.BitmapBucketNumber) -> str:
2227    def bitmapbucketnumber_sql(self, expression: exp.BitmapBucketNumber) -> str:
2228        """
2229        Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
2230
2231        Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
2232        - Each bucket covers 32,768 values
2233        - Bucket numbering starts at 1
2234        - Formula: ((value - 1) // 32768) + 1 for positive values
2235
2236        For non-positive values (0 and negative), we use value // 32768 to avoid
2237        producing bucket 0 or positive bucket IDs for negative inputs.
2238        """
2239        value = expression.this
2240
2241        positive_formula = ((value - 1) // 32768) + 1
2242        non_positive_formula = value // 32768
2243
2244        # CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
2245        case_expr = (
2246            exp.case()
2247            .when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
2248            .else_(non_positive_formula)
2249        )
2250        return self.sql(case_expr)

Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.

Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:

  • Each bucket covers 32,768 values
  • Bucket numbering starts at 1
  • Formula: ((value - 1) // 32768) + 1 for positive values

For non-positive values (0 and negative), we use value // 32768 to avoid producing bucket 0 or positive bucket IDs for negative inputs.

def bitmapbitposition_sql(self, expression: sqlglot.expressions.math.BitmapBitPosition) -> str:
2252    def bitmapbitposition_sql(self, expression: exp.BitmapBitPosition) -> str:
2253        """
2254        Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
2255
2256        Snowflake's BITMAP_BIT_POSITION behavior:
2257        - For n <= 0: returns ABS(n) % 32768
2258        - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
2259        """
2260        this = expression.this
2261
2262        return self.sql(
2263            exp.Mod(
2264                this=exp.Paren(
2265                    this=exp.If(
2266                        this=exp.GT(this=this, expression=exp.Literal.number(0)),
2267                        true=this - exp.Literal.number(1),
2268                        false=exp.Abs(this=this),
2269                    )
2270                ),
2271                expression=MAX_BIT_POSITION,
2272            )
2273        )

Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.

Snowflake's BITMAP_BIT_POSITION behavior:

  • For n <= 0: returns ABS(n) % 32768
  • For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
def bitmapconstructagg_sql(self, expression: sqlglot.expressions.math.BitmapConstructAgg) -> str:
2275    def bitmapconstructagg_sql(self, expression: exp.BitmapConstructAgg) -> str:
2276        """
2277        Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
2278        Uses a pre-parsed template with placeholders replaced by expression nodes.
2279
2280        Snowflake bitmap format:
2281        - Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
2282        - Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
2283        """
2284        arg = expression.this
2285        return (
2286            f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
2287        )

Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent. Uses a pre-parsed template with placeholders replaced by expression nodes.

Snowflake bitmap format:

  • Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
  • Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
def compress_sql(self, expression: sqlglot.expressions.string.Compress) -> str:
2289    def compress_sql(self, expression: exp.Compress) -> str:
2290        self.unsupported("DuckDB does not support the COMPRESS() function")
2291        return self.function_fallback_sql(expression)
def encrypt_sql(self, expression: sqlglot.expressions.string.Encrypt) -> str:
2293    def encrypt_sql(self, expression: exp.Encrypt) -> str:
2294        self.unsupported("ENCRYPT is not supported in DuckDB")
2295        return self.function_fallback_sql(expression)
def decrypt_sql(self, expression: sqlglot.expressions.string.Decrypt) -> str:
2297    def decrypt_sql(self, expression: exp.Decrypt) -> str:
2298        func_name = "TRY_DECRYPT" if expression.args.get("safe") else "DECRYPT"
2299        self.unsupported(f"{func_name} is not supported in DuckDB")
2300        return self.function_fallback_sql(expression)
def decryptraw_sql(self, expression: sqlglot.expressions.string.DecryptRaw) -> str:
2302    def decryptraw_sql(self, expression: exp.DecryptRaw) -> str:
2303        func_name = "TRY_DECRYPT_RAW" if expression.args.get("safe") else "DECRYPT_RAW"
2304        self.unsupported(f"{func_name} is not supported in DuckDB")
2305        return self.function_fallback_sql(expression)
def encryptraw_sql(self, expression: sqlglot.expressions.string.EncryptRaw) -> str:
2307    def encryptraw_sql(self, expression: exp.EncryptRaw) -> str:
2308        self.unsupported("ENCRYPT_RAW is not supported in DuckDB")
2309        return self.function_fallback_sql(expression)
def parseurl_sql(self, expression: sqlglot.expressions.string.ParseUrl) -> str:
2311    def parseurl_sql(self, expression: exp.ParseUrl) -> str:
2312        self.unsupported("PARSE_URL is not supported in DuckDB")
2313        return self.function_fallback_sql(expression)
def parseip_sql(self, expression: sqlglot.expressions.functions.ParseIp) -> str:
2315    def parseip_sql(self, expression: exp.ParseIp) -> str:
2316        self.unsupported("PARSE_IP is not supported in DuckDB")
2317        return self.function_fallback_sql(expression)
def decompressstring_sql(self, expression: sqlglot.expressions.string.DecompressString) -> str:
2319    def decompressstring_sql(self, expression: exp.DecompressString) -> str:
2320        self.unsupported("DECOMPRESS_STRING is not supported in DuckDB")
2321        return self.function_fallback_sql(expression)
def decompressbinary_sql(self, expression: sqlglot.expressions.string.DecompressBinary) -> str:
2323    def decompressbinary_sql(self, expression: exp.DecompressBinary) -> str:
2324        self.unsupported("DECOMPRESS_BINARY is not supported in DuckDB")
2325        return self.function_fallback_sql(expression)
def jarowinklersimilarity_sql(self, expression: sqlglot.expressions.math.JarowinklerSimilarity) -> str:
2327    def jarowinklersimilarity_sql(self, expression: exp.JarowinklerSimilarity) -> str:
2328        this = expression.this
2329        expr = expression.expression
2330
2331        if expression.args.get("case_insensitive"):
2332            this = exp.Upper(this=this)
2333            expr = exp.Upper(this=expr)
2334
2335        result = exp.func("JARO_WINKLER_SIMILARITY", this, expr)
2336
2337        if expression.args.get("integer_scale"):
2338            result = exp.cast(result * 100, "INTEGER")
2339
2340        return self.sql(result)
def nthvalue_sql(self, expression: sqlglot.expressions.aggregate.NthValue) -> str:
2342    def nthvalue_sql(self, expression: exp.NthValue) -> str:
2343        from_first = expression.args.get("from_first", True)
2344        if not from_first:
2345            self.unsupported("DuckDB's NTH_VALUE doesn't support starting from the end ")
2346
2347        return self.function_fallback_sql(expression)
def randstr_sql(self, expression: sqlglot.expressions.functions.Randstr) -> str:
2349    def randstr_sql(self, expression: exp.Randstr) -> str:
2350        """
2351        Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
2352        Uses a pre-parsed template with placeholders replaced by expression nodes.
2353
2354        RANDSTR(length, generator) generates a random string of specified length.
2355        - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
2356        - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
2357        - No generator: Use default seed value
2358        """
2359        length = expression.this
2360        generator = expression.args.get("generator")
2361
2362        if generator:
2363            if isinstance(generator, exp.Rand):
2364                # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
2365                seed_value = generator.this or generator
2366            else:
2367                # Const/int or other expression - use as seed directly
2368                seed_value = generator
2369        else:
2370            # No generator specified, use default seed (arbitrary but deterministic)
2371            seed_value = exp.Literal.number(RANDSTR_SEED)
2372
2373        replacements = {"seed": seed_value, "length": length}
2374        return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"

Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random. Uses a pre-parsed template with placeholders replaced by expression nodes.

RANDSTR(length, generator) generates a random string of specified length.

  • With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
  • With RANDOM(): Use RANDOM() in the hash for non-deterministic output
  • No generator: Use default seed value
@unsupported_args('finish')
def reduce_sql(self, expression: sqlglot.expressions.array.Reduce) -> str:
2376    @unsupported_args("finish")
2377    def reduce_sql(self, expression: exp.Reduce) -> str:
2378        array_arg = expression.this
2379        initial_value = expression.args.get("initial")
2380        merge_lambda = expression.args.get("merge")
2381
2382        if merge_lambda:
2383            merge_lambda.set("colon", True)
2384
2385        return self.func("list_reduce", array_arg, merge_lambda, initial_value)
def zipf_sql(self, expression: sqlglot.expressions.functions.Zipf) -> str:
2387    def zipf_sql(self, expression: exp.Zipf) -> str:
2388        """
2389        Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
2390        Uses a pre-parsed template with placeholders replaced by expression nodes.
2391        """
2392        s = expression.this
2393        n = expression.args["elementcount"]
2394        gen = expression.args["gen"]
2395
2396        if not isinstance(gen, exp.Rand):
2397            # (ABS(HASH(seed)) % 1000000) / 1000000.0
2398            random_expr: exp.Expr = exp.Div(
2399                this=exp.Paren(
2400                    this=exp.Mod(
2401                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
2402                        expression=exp.Literal.number(1000000),
2403                    )
2404                ),
2405                expression=exp.Literal.number(1000000.0),
2406            )
2407        else:
2408            # Use RANDOM() for non-deterministic output
2409            random_expr = exp.Rand()
2410
2411        replacements = {"s": s, "n": n, "random_expr": random_expr}
2412        return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"

Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling. Uses a pre-parsed template with placeholders replaced by expression nodes.

def tobinary_sql(self, expression: sqlglot.expressions.string.ToBinary) -> str:
2414    def tobinary_sql(self, expression: exp.ToBinary) -> str:
2415        """
2416        TO_BINARY and TRY_TO_BINARY transpilation:
2417        - 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
2418        - 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
2419        - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')
2420
2421        For TRY_TO_BINARY (safe=True), wrap with TRY():
2422        - 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
2423        """
2424        value = expression.this
2425        format_arg = expression.args.get("format")
2426        is_safe = expression.args.get("safe")
2427        is_binary = _is_binary(expression)
2428
2429        if not format_arg and not is_binary:
2430            func_name = "TRY_TO_BINARY" if is_safe else "TO_BINARY"
2431            return self.func(func_name, value)
2432
2433        # Snowflake defaults to HEX encoding when no format is specified
2434        fmt = format_arg.name.upper() if format_arg else "HEX"
2435
2436        if fmt in ("UTF-8", "UTF8"):
2437            # DuckDB ENCODE always uses UTF-8, no charset parameter needed
2438            result = self.func("ENCODE", value)
2439        elif fmt == "BASE64":
2440            result = self.func("FROM_BASE64", value)
2441        elif fmt == "HEX":
2442            result = self.func("UNHEX", value)
2443        else:
2444            if is_safe:
2445                return self.sql(exp.null())
2446            else:
2447                self.unsupported(f"format {fmt} is not supported")
2448                result = self.func("TO_BINARY", value)
2449        return f"TRY({result})" if is_safe else result

TO_BINARY and TRY_TO_BINARY transpilation:

  • 'HEX': TO_BINARY('48454C50', 'HEX') -> UNHEX('48454C50')
  • 'UTF-8': TO_BINARY('TEST', 'UTF-8') -> ENCODE('TEST')
  • 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') -> FROM_BASE64('SEVMUA==')

For TRY_TO_BINARY (safe=True), wrap with TRY():

  • 'HEX': TRY_TO_BINARY('invalid', 'HEX') -> TRY(UNHEX('invalid'))
def tonumber_sql(self, expression: sqlglot.expressions.string.ToNumber) -> str:
2451    def tonumber_sql(self, expression: exp.ToNumber) -> str:
2452        fmt = expression.args.get("format")
2453        precision = expression.args.get("precision")
2454        scale = expression.args.get("scale")
2455
2456        if not fmt and precision and scale:
2457            return self.sql(
2458                exp.cast(
2459                    expression.this, f"DECIMAL({precision.name}, {scale.name})", dialect="duckdb"
2460                )
2461            )
2462
2463        return super().tonumber_sql(expression)
def generator_sql(self, expression: sqlglot.expressions.array.Generator) -> str:
2489    def generator_sql(self, expression: exp.Generator) -> str:
2490        # Transpile Snowflake GENERATOR to DuckDB range()
2491        rowcount = expression.args.get("rowcount")
2492        time_limit = expression.args.get("time_limit")
2493
2494        if time_limit:
2495            self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
2496
2497        if not rowcount:
2498            self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
2499            return self.func("range", exp.Literal.number(0))
2500
2501        return self.func("range", rowcount)
def greatest_sql(self, expression: sqlglot.expressions.functions.Greatest) -> str:
2503    def greatest_sql(self, expression: exp.Greatest) -> str:
2504        return self._greatest_least_sql(expression)
def least_sql(self, expression: sqlglot.expressions.functions.Least) -> str:
2506    def least_sql(self, expression: exp.Least) -> str:
2507        return self._greatest_least_sql(expression)
def lambda_sql( self, expression: sqlglot.expressions.query.Lambda, arrow_sep: str = '->', wrap: bool = True) -> str:
2509    def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str:
2510        if expression.args.get("colon"):
2511            prefix = "LAMBDA "
2512            arrow_sep = ":"
2513            wrap = False
2514        else:
2515            prefix = ""
2516
2517        lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap)
2518        return f"{prefix}{lambda_sql}"
def show_sql(self, expression: sqlglot.expressions.ddl.Show) -> str:
2520    def show_sql(self, expression: exp.Show) -> str:
2521        from_ = self.sql(expression, "from_")
2522        from_ = f" FROM {from_}" if from_ else ""
2523        return f"SHOW {expression.name}{from_}"
def soundex_sql(self, expression: sqlglot.expressions.string.Soundex) -> str:
2525    def soundex_sql(self, expression: exp.Soundex) -> str:
2526        self.unsupported("SOUNDEX is not supported in DuckDB")
2527        return self.func("SOUNDEX", expression.this)
def sortarray_sql(self, expression: sqlglot.expressions.array.SortArray) -> str:
2529    def sortarray_sql(self, expression: exp.SortArray) -> str:
2530        arr = expression.this
2531        asc = expression.args.get("asc")
2532        nulls_first = expression.args.get("nulls_first")
2533
2534        if not isinstance(asc, exp.Boolean) and not isinstance(nulls_first, exp.Boolean):
2535            return self.func("LIST_SORT", arr, asc, nulls_first)
2536
2537        nulls_are_first = nulls_first == exp.true()
2538        nulls_first_sql = exp.Literal.string("NULLS FIRST") if nulls_are_first else None
2539
2540        if not isinstance(asc, exp.Boolean):
2541            return self.func("LIST_SORT", arr, asc, nulls_first_sql)
2542
2543        descending = asc == exp.false()
2544
2545        if not descending and not nulls_are_first:
2546            return self.func("LIST_SORT", arr)
2547        if not nulls_are_first:
2548            return self.func("ARRAY_REVERSE_SORT", arr)
2549        return self.func(
2550            "LIST_SORT",
2551            arr,
2552            exp.Literal.string("DESC" if descending else "ASC"),
2553            exp.Literal.string("NULLS FIRST"),
2554        )
def install_sql(self, expression: sqlglot.expressions.ddl.Install) -> str:
2556    def install_sql(self, expression: exp.Install) -> str:
2557        force = "FORCE " if expression.args.get("force") else ""
2558        this = self.sql(expression, "this")
2559        from_clause = expression.args.get("from_")
2560        from_clause = f" FROM {from_clause}" if from_clause else ""
2561        return f"{force}INSTALL {this}{from_clause}"
def approxtopk_sql(self, expression: sqlglot.expressions.aggregate.ApproxTopK) -> str:
2563    def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
2564        self.unsupported(
2565            "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
2566        )
2567        return self.function_fallback_sql(expression)
def fromiso8601timestamp_sql( self, expression: sqlglot.expressions.temporal.FromISO8601Timestamp) -> str:
2569    def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
2570        return self.sql(exp.cast(expression.this, exp.DType.TIMESTAMPTZ))
def strposition_sql(self, expression: sqlglot.expressions.string.StrPosition) -> str:
2572    def strposition_sql(self, expression: exp.StrPosition) -> str:
2573        this = expression.this
2574        substr = expression.args.get("substr")
2575        position = expression.args.get("position")
2576
2577        # For BINARY/BLOB: DuckDB's STRPOS doesn't support BLOB types
2578        # Convert to HEX strings, use STRPOS, then convert hex position to byte position
2579        if _is_binary(this):
2580            # Build expression: STRPOS(HEX(haystack), HEX(needle))
2581            hex_strpos = exp.StrPosition(
2582                this=exp.Hex(this=this),
2583                substr=exp.Hex(this=substr),
2584            )
2585
2586            return self.sql(exp.cast((hex_strpos + 1) / 2, exp.DType.INT))
2587
2588        # For VARCHAR: handle clamp_position
2589        if expression.args.get("clamp_position") and position:
2590            expression = expression.copy()
2591            expression.set(
2592                "position",
2593                exp.If(
2594                    this=exp.LTE(this=position, expression=exp.Literal.number(0)),
2595                    true=exp.Literal.number(1),
2596                    false=position.copy(),
2597                ),
2598            )
2599
2600        return strposition_sql(self, expression)
def substring_sql(self, expression: sqlglot.expressions.string.Substring) -> str:
2602    def substring_sql(self, expression: exp.Substring) -> str:
2603        if expression.args.get("zero_start"):
2604            start = expression.args.get("start")
2605            length = expression.args.get("length")
2606
2607            if start := expression.args.get("start"):
2608                start = exp.If(this=start.eq(0), true=exp.Literal.number(1), false=start)
2609            if length := expression.args.get("length"):
2610                length = exp.If(this=length < 0, true=exp.Literal.number(0), false=length)
2611
2612            return self.func("SUBSTRING", expression.this, start, length)
2613
2614        return self.function_fallback_sql(expression)
def strtotime_sql(self, expression: sqlglot.expressions.temporal.StrToTime) -> str:
2616    def strtotime_sql(self, expression: exp.StrToTime) -> str:
2617        # Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
2618        target_type = expression.args.get("target_type")
2619        needs_tz = target_type and target_type.this in (
2620            exp.DType.TIMESTAMPLTZ,
2621            exp.DType.TIMESTAMPTZ,
2622        )
2623
2624        if expression.args.get("safe"):
2625            formatted_time = self.format_time(expression)
2626            cast_type = exp.DType.TIMESTAMPTZ if needs_tz else exp.DType.TIMESTAMP
2627            return self.sql(
2628                exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
2629            )
2630
2631        base_sql = str_to_time_sql(self, expression)
2632        if needs_tz:
2633            return self.sql(
2634                exp.cast(
2635                    base_sql,
2636                    exp.DataType(this=exp.DType.TIMESTAMPTZ),
2637                )
2638            )
2639        return base_sql
def strtodate_sql(self, expression: sqlglot.expressions.temporal.StrToDate) -> str:
2641    def strtodate_sql(self, expression: exp.StrToDate) -> str:
2642        formatted_time = self.format_time(expression)
2643        function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
2644        return self.sql(
2645            exp.cast(
2646                self.func(function_name, expression.this, formatted_time),
2647                exp.DataType(this=exp.DType.DATE),
2648            )
2649        )
def tsordstotime_sql(self, expression: sqlglot.expressions.temporal.TsOrDsToTime) -> str:
2651    def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
2652        this = expression.this
2653        time_format = self.format_time(expression)
2654        safe = expression.args.get("safe")
2655        time_type = exp.DataType.from_str("TIME", dialect="duckdb")
2656        cast_expr = exp.TryCast if safe else exp.Cast
2657
2658        if time_format:
2659            func_name = "TRY_STRPTIME" if safe else "STRPTIME"
2660            strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
2661            return self.sql(cast_expr(this=strptime, to=time_type))
2662
2663        if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME):
2664            return self.sql(this)
2665
2666        return self.sql(cast_expr(this=this, to=time_type))
def currentdate_sql(self, expression: sqlglot.expressions.temporal.CurrentDate) -> str:
2668    def currentdate_sql(self, expression: exp.CurrentDate) -> str:
2669        if not expression.this:
2670            return "CURRENT_DATE"
2671
2672        expr = exp.Cast(
2673            this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
2674            to=exp.DataType(this=exp.DType.DATE),
2675        )
2676        return self.sql(expr)
def checkjson_sql(self, expression: sqlglot.expressions.json.CheckJson) -> str:
2678    def checkjson_sql(self, expression: exp.CheckJson) -> str:
2679        arg = expression.this
2680        return self.sql(
2681            exp.case()
2682            .when(
2683                exp.or_(arg.is_(exp.Null()), arg.eq(""), exp.func("json_valid", arg)),
2684                exp.null(),
2685            )
2686            .else_(exp.Literal.string("Invalid JSON"))
2687        )
def parsejson_sql(self, expression: sqlglot.expressions.json.ParseJSON) -> str:
2689    def parsejson_sql(self, expression: exp.ParseJSON) -> str:
2690        arg = expression.this
2691        if expression.args.get("safe"):
2692            return self.sql(
2693                exp.case()
2694                .when(exp.func("json_valid", arg), exp.cast(arg.copy(), "JSON"))
2695                .else_(exp.null())
2696            )
2697        return self.func("JSON", arg)
def unicode_sql(self, expression: sqlglot.expressions.string.Unicode) -> str:
2699    def unicode_sql(self, expression: exp.Unicode) -> str:
2700        if expression.args.get("empty_is_zero"):
2701            return self.sql(
2702                exp.case()
2703                .when(expression.this.eq(exp.Literal.string("")), exp.Literal.number(0))
2704                .else_(exp.Anonymous(this="UNICODE", expressions=[expression.this]))
2705            )
2706
2707        return self.func("UNICODE", expression.this)
def stripnullvalue_sql(self, expression: sqlglot.expressions.json.StripNullValue) -> str:
2709    def stripnullvalue_sql(self, expression: exp.StripNullValue) -> str:
2710        return self.sql(
2711            exp.case()
2712            .when(exp.func("json_type", expression.this).eq("NULL"), exp.null())
2713            .else_(expression.this)
2714        )
def trunc_sql(self, expression: sqlglot.expressions.math.Trunc) -> str:
2716    def trunc_sql(self, expression: exp.Trunc) -> str:
2717        decimals = expression.args.get("decimals")
2718        if (
2719            expression.args.get("fractions_supported")
2720            and decimals
2721            and not decimals.is_type(exp.DType.INT)
2722        ):
2723            decimals = exp.cast(decimals, exp.DType.INT, dialect="duckdb")
2724
2725        return self.func("TRUNC", expression.this, decimals)
def normal_sql(self, expression: sqlglot.expressions.functions.Normal) -> str:
2727    def normal_sql(self, expression: exp.Normal) -> str:
2728        """
2729        Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
2730
2731        Uses the Box-Muller transform via NORMAL_TEMPLATE.
2732        """
2733        mean = expression.this
2734        stddev = expression.args["stddev"]
2735        gen: exp.Expr = expression.args["gen"]
2736
2737        # Build two uniform random values [0, 1) for Box-Muller transform
2738        if isinstance(gen, exp.Rand) and gen.this is None:
2739            u1: exp.Expr = exp.Rand()
2740            u2: exp.Expr = exp.Rand()
2741        else:
2742            # Seeded: derive two values using HASH with different inputs
2743            seed = gen.this if isinstance(gen, exp.Rand) else gen
2744            u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
2745            u2 = exp.replace_placeholders(
2746                self.SEEDED_RANDOM_TEMPLATE,
2747                seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
2748            )
2749
2750        replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
2751        return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))

Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.

Uses the Box-Muller transform via NORMAL_TEMPLATE.

def uniform_sql(self, expression: sqlglot.expressions.functions.Uniform) -> str:
2753    def uniform_sql(self, expression: exp.Uniform) -> str:
2754        """
2755        Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
2756
2757        UNIFORM returns a random value in [min, max]:
2758        - Integer result if both min and max are integers
2759        - Float result if either min or max is a float
2760        """
2761        min_val = expression.this
2762        max_val = expression.expression
2763        gen = expression.args.get("gen")
2764
2765        # Determine if result should be integer (both bounds are integers).
2766        # We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
2767        is_int_result = min_val.is_int and max_val.is_int
2768
2769        # Build the random value expression [0, 1)
2770        if not isinstance(gen, exp.Rand):
2771            # Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
2772            random_expr: exp.Expr = exp.Div(
2773                this=exp.Paren(
2774                    this=exp.Mod(
2775                        this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
2776                        expression=exp.Literal.number(1000000),
2777                    )
2778                ),
2779                expression=exp.Literal.number(1000000.0),
2780            )
2781        else:
2782            random_expr = exp.Rand()
2783
2784        # Build: min + random * (max - min [+ 1 for int])
2785        range_expr: exp.Expr = exp.Sub(this=max_val, expression=min_val)
2786        if is_int_result:
2787            range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
2788
2789        result: exp.Expr = exp.Add(
2790            this=min_val,
2791            expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
2792        )
2793
2794        if is_int_result:
2795            result = exp.Cast(this=exp.Floor(this=result), to=exp.DType.BIGINT.into_expr())
2796
2797        return self.sql(result)

Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.

UNIFORM returns a random value in [min, max]:

  • Integer result if both min and max are integers
  • Float result if either min or max is a float
def timefromparts_sql(self, expression: sqlglot.expressions.temporal.TimeFromParts) -> str:
2799    def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
2800        nano = expression.args.get("nano")
2801        overflow = expression.args.get("overflow")
2802
2803        # Snowflake's TIME_FROM_PARTS supports overflow
2804        if overflow:
2805            hour = expression.args["hour"]
2806            minute = expression.args["min"]
2807            sec = expression.args["sec"]
2808
2809            # Check if values are within normal ranges - use MAKE_TIME for efficiency
2810            if not nano and all(arg.is_int for arg in [hour, minute, sec]):
2811                try:
2812                    h_val = hour.to_py()
2813                    m_val = minute.to_py()
2814                    s_val = sec.to_py()
2815                    if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
2816                        return rename_func("MAKE_TIME")(self, expression)
2817                except ValueError:
2818                    pass
2819
2820            # Overflow or nanoseconds detected - use INTERVAL arithmetic
2821            if nano:
2822                sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
2823
2824            total_seconds = hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
2825
2826            return self.sql(
2827                exp.Add(
2828                    this=exp.Cast(
2829                        this=exp.Literal.string("00:00:00"), to=exp.DType.TIME.into_expr()
2830                    ),
2831                    expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
2832                )
2833            )
2834
2835        # Default: MAKE_TIME
2836        if nano:
2837            expression.set(
2838                "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
2839            )
2840
2841        return rename_func("MAKE_TIME")(self, expression)
def extract_sql(self, expression: sqlglot.expressions.temporal.Extract) -> str:
2843    def extract_sql(self, expression: exp.Extract) -> str:
2844        """
2845        Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
2846
2847        DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
2848        EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
2849        """
2850        this = expression.this
2851        datetime_expr = expression.expression
2852
2853        # TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
2854        # because Snowflake applies server timezone while DuckDB uses local timezone
2855        if datetime_expr.is_type(exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ):
2856            self.unsupported(
2857                "EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
2858            )
2859
2860        part_name = this.name.upper()
2861
2862        if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
2863            fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
2864
2865            # Problem: strftime doesn't accept TIME and there's no NANOSECOND function
2866            # So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
2867            is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
2868                exp.DType.TIME, exp.DType.TIMETZ
2869            )
2870
2871            if is_nano_time:
2872                self.unsupported("Parameter NANOSECOND is not supported with TIME type in DuckDB")
2873                return self.sql(
2874                    exp.cast(
2875                        exp.Mul(
2876                            this=exp.Extract(this=exp.var("MICROSECOND"), expression=datetime_expr),
2877                            expression=exp.Literal.number(1000),
2878                        ),
2879                        exp.DataType.from_str(cast_type, dialect="duckdb"),
2880                    )
2881                )
2882
2883            # For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
2884            strftime_input = datetime_expr
2885            if part_name == "NANOSECOND":
2886                strftime_input = exp.cast(datetime_expr, exp.DType.TIMESTAMP_NS)
2887
2888            return self.sql(
2889                exp.cast(
2890                    exp.Anonymous(
2891                        this="STRFTIME",
2892                        expressions=[strftime_input, exp.Literal.string(fmt)],
2893                    ),
2894                    exp.DataType.from_str(cast_type, dialect="duckdb"),
2895                )
2896            )
2897
2898        if part_name in self.EXTRACT_EPOCH_MAPPINGS:
2899            func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
2900            result: exp.Expr = exp.Anonymous(this=func_name, expressions=[datetime_expr])
2901            # EPOCH returns float, cast to BIGINT for integer result
2902            if part_name == "EPOCH_SECOND":
2903                result = exp.cast(result, exp.DataType.from_str("BIGINT", dialect="duckdb"))
2904            return self.sql(result)
2905
2906        return super().extract_sql(expression)

Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.

DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND, EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND

def timestampfromparts_sql(self, expression: sqlglot.expressions.temporal.TimestampFromParts) -> str:
2908    def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
2909        # Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
2910        date_expr = expression.this
2911        time_expr = expression.expression
2912
2913        if date_expr is not None and time_expr is not None:
2914            # In DuckDB, DATE + TIME produces TIMESTAMP
2915            return self.sql(exp.Add(this=date_expr, expression=time_expr))
2916
2917        # Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
2918        sec = expression.args.get("sec")
2919        if sec is None:
2920            # This shouldn't happen with valid input, but handle gracefully
2921            return rename_func("MAKE_TIMESTAMP")(self, expression)
2922
2923        milli = expression.args.get("milli")
2924        if milli is not None:
2925            sec += milli.pop() / exp.Literal.number(1000.0)
2926
2927        nano = expression.args.get("nano")
2928        if nano is not None:
2929            sec += nano.pop() / exp.Literal.number(1000000000.0)
2930
2931        if milli or nano:
2932            expression.set("sec", sec)
2933
2934        return rename_func("MAKE_TIMESTAMP")(self, expression)
@unsupported_args('nano')
def timestampltzfromparts_sql( self, expression: sqlglot.expressions.temporal.TimestampLtzFromParts) -> str:
2936    @unsupported_args("nano")
2937    def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
2938        # Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2939        if nano := expression.args.get("nano"):
2940            nano.pop()
2941
2942        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2943        return f"CAST({timestamp} AS TIMESTAMPTZ)"
@unsupported_args('nano')
def timestamptzfromparts_sql( self, expression: sqlglot.expressions.temporal.TimestampTzFromParts) -> str:
2945    @unsupported_args("nano")
2946    def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
2947        # Extract zone before popping
2948        zone = expression.args.get("zone")
2949        # Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
2950        if zone:
2951            zone = zone.pop()
2952
2953        if nano := expression.args.get("nano"):
2954            nano.pop()
2955
2956        timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
2957
2958        if zone:
2959            # Use AT TIME ZONE to apply the explicit timezone
2960            return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
2961
2962        return timestamp
def tablesample_sql( self, expression: sqlglot.expressions.query.TableSample, tablesample_keyword: str | None = None) -> str:
2964    def tablesample_sql(
2965        self,
2966        expression: exp.TableSample,
2967        tablesample_keyword: str | None = None,
2968    ) -> str:
2969        if not isinstance(expression.parent, exp.Select):
2970            # This sample clause only applies to a single source, not the entire resulting relation
2971            tablesample_keyword = "TABLESAMPLE"
2972
2973        if expression.args.get("size"):
2974            method = expression.args.get("method")
2975            if method and method.name.upper() != "RESERVOIR":
2976                self.unsupported(
2977                    f"Sampling method {method} is not supported with a discrete sample count, "
2978                    "defaulting to reservoir sampling"
2979                )
2980                expression.set("method", exp.var("RESERVOIR"))
2981
2982        return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
def join_sql(self, expression: sqlglot.expressions.query.Join) -> str:
2984    def join_sql(self, expression: exp.Join) -> str:
2985        if (
2986            not expression.args.get("using")
2987            and not expression.args.get("on")
2988            and not expression.method
2989            and (expression.kind in ("", "INNER", "OUTER"))
2990        ):
2991            # Some dialects support `LEFT/INNER JOIN UNNEST(...)` without an explicit ON clause
2992            # DuckDB doesn't, but we can just add a dummy ON clause that is always true
2993            if isinstance(expression.this, exp.Unnest):
2994                return super().join_sql(expression.on(exp.true()))
2995
2996            expression.set("side", None)
2997            expression.set("kind", None)
2998
2999        return super().join_sql(expression)
def countif_sql(self, expression: sqlglot.expressions.aggregate.CountIf) -> str:
3001    def countif_sql(self, expression: exp.CountIf) -> str:
3002        if self.dialect.version >= (1, 2):
3003            return self.function_fallback_sql(expression)
3004
3005        # https://github.com/tobymao/sqlglot/pull/4749
3006        return count_if_to_sum(self, expression)
def bracket_sql(self, expression: sqlglot.expressions.core.Bracket) -> str:
3008    def bracket_sql(self, expression: exp.Bracket) -> str:
3009        if self.dialect.version >= (1, 2):
3010            return super().bracket_sql(expression)
3011
3012        # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
3013        this = expression.this
3014        if isinstance(this, exp.Array):
3015            this.replace(exp.paren(this))
3016
3017        bracket = super().bracket_sql(expression)
3018
3019        if not expression.args.get("returns_list_for_maps"):
3020            if not this.type:
3021                from sqlglot.optimizer.annotate_types import annotate_types
3022
3023                this = annotate_types(this, dialect=self.dialect)
3024
3025            if this.is_type(exp.DType.MAP):
3026                bracket = f"({bracket})[1]"
3027
3028        return bracket
def withingroup_sql(self, expression: sqlglot.expressions.core.WithinGroup) -> str:
3030    def withingroup_sql(self, expression: exp.WithinGroup) -> str:
3031        func = expression.this
3032
3033        # For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
3034        # Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
3035        if isinstance(func, exp.ArrayAgg):
3036            if not isinstance(order := expression.expression, exp.Order):
3037                return self.sql(func)
3038
3039            # Save the original column for FILTER clause (before wrapping with Order)
3040            original_this = func.this
3041
3042            # Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
3043            # ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
3044            func.set(
3045                "this",
3046                exp.Order(
3047                    this=func.this.copy(),
3048                    expressions=order.expressions,
3049                ),
3050            )
3051
3052            # Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
3053            # Use original_this (not the Order-wrapped version) for the FILTER condition
3054            array_agg_sql = self.function_fallback_sql(func)
3055            return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
3056
3057        # For other functions (like PERCENTILES), use existing logic
3058        expression_sql = self.sql(expression, "expression")
3059
3060        if isinstance(func, exp.PERCENTILES):
3061            # Make the order key the first arg and slide the fraction to the right
3062            # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
3063            order_col = expression.find(exp.Ordered)
3064            if order_col:
3065                func.set("expression", func.this)
3066                func.set("this", order_col.this)
3067
3068        this = self.sql(expression, "this").rstrip(")")
3069
3070        return f"{this}{expression_sql})"
def length_sql(self, expression: sqlglot.expressions.string.Length) -> str:
3072    def length_sql(self, expression: exp.Length) -> str:
3073        arg = expression.this
3074
3075        # Dialects like BQ and Snowflake also accept binary values as args, so
3076        # DDB will attempt to infer the type or resort to case/when resolution
3077        if not expression.args.get("binary") or arg.is_string:
3078            return self.func("LENGTH", arg)
3079
3080        if not arg.type:
3081            from sqlglot.optimizer.annotate_types import annotate_types
3082
3083            arg = annotate_types(arg, dialect=self.dialect)
3084
3085        if arg.is_type(*exp.DataType.TEXT_TYPES):
3086            return self.func("LENGTH", arg)
3087
3088        # We need these casts to make duckdb's static type checker happy
3089        blob = exp.cast(arg, exp.DType.VARBINARY)
3090        varchar = exp.cast(arg, exp.DType.VARCHAR)
3091
3092        case = (
3093            exp.case(exp.Anonymous(this="TYPEOF", expressions=[arg]))
3094            .when(exp.Literal.string("BLOB"), exp.ByteLength(this=blob))
3095            .else_(exp.Anonymous(this="LENGTH", expressions=[varchar]))
3096        )
3097        return self.sql(case)
def bitlength_sql(self, expression: sqlglot.expressions.string.BitLength) -> str:
3099    def bitlength_sql(self, expression: exp.BitLength) -> str:
3100        if not _is_binary(arg := expression.this):
3101            return self.func("BIT_LENGTH", arg)
3102
3103        blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
3104        return self.sql(exp.ByteLength(this=blob) * exp.Literal.number(8))
def chr_sql( self, expression: sqlglot.expressions.string.Chr, name: str = 'CHR') -> str:
3106    def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str:
3107        arg = expression.expressions[0]
3108        if arg.is_type(*exp.DataType.REAL_TYPES):
3109            arg = exp.cast(arg, exp.DType.INT)
3110        return self.func("CHR", arg)
def collation_sql(self, expression: sqlglot.expressions.functions.Collation) -> str:
3112    def collation_sql(self, expression: exp.Collation) -> str:
3113        self.unsupported("COLLATION function is not supported by DuckDB")
3114        return self.function_fallback_sql(expression)
def collate_sql(self, expression: sqlglot.expressions.functions.Collate) -> str:
3116    def collate_sql(self, expression: exp.Collate) -> str:
3117        if not expression.expression.is_string:
3118            return super().collate_sql(expression)
3119
3120        raw = expression.expression.name
3121        if not raw:
3122            return self.sql(expression.this)
3123
3124        parts = []
3125        for part in raw.split("-"):
3126            lower = part.lower()
3127            if lower not in _SNOWFLAKE_COLLATION_DEFAULTS:
3128                if lower in _SNOWFLAKE_COLLATION_UNSUPPORTED:
3129                    self.unsupported(
3130                        f"Snowflake collation specifier '{part}' has no DuckDB equivalent"
3131                    )
3132                parts.append(lower)
3133
3134        if not parts:
3135            return self.sql(expression.this)
3136        return super().collate_sql(
3137            exp.Collate(this=expression.this, expression=exp.var(".".join(parts)))
3138        )
def regexpcount_sql(self, expression: sqlglot.expressions.string.RegexpCount) -> str:
3170    def regexpcount_sql(self, expression: exp.RegexpCount) -> str:
3171        this = expression.this
3172        pattern = expression.expression
3173        position = expression.args.get("position")
3174        parameters = expression.args.get("parameters")
3175
3176        # Validate flags - only "ims" flags are supported for embedded patterns
3177        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
3178
3179        if position:
3180            this = exp.Substring(this=this, start=position)
3181
3182        # Embed flags in pattern (REGEXP_EXTRACT_ALL doesn't support flags argument)
3183        if validated_flags:
3184            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
3185
3186        # Handle empty pattern: Snowflake returns 0, DuckDB would match between every character
3187        result = (
3188            exp.case()
3189            .when(
3190                exp.EQ(this=pattern, expression=exp.Literal.string("")),
3191                exp.Literal.number(0),
3192            )
3193            .else_(
3194                exp.Length(
3195                    this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
3196                )
3197            )
3198        )
3199
3200        return self.sql(result)
def regexpreplace_sql(self, expression: sqlglot.expressions.string.RegexpReplace) -> str:
3202    def regexpreplace_sql(self, expression: exp.RegexpReplace) -> str:
3203        subject = expression.this
3204        pattern = expression.expression
3205        replacement = expression.args.get("replacement") or exp.Literal.string("")
3206        position = expression.args.get("position")
3207        occurrence = expression.args.get("occurrence")
3208        modifiers = expression.args.get("modifiers")
3209
3210        validated_flags = self._validate_regexp_flags(modifiers, supported_flags="cimsg") or ""
3211
3212        # Handle occurrence (only literals supported)
3213        if occurrence and not occurrence.is_int:
3214            self.unsupported("REGEXP_REPLACE with non-literal occurrence")
3215        else:
3216            occurrence = occurrence.to_py() if occurrence and occurrence.is_int else 0
3217            if occurrence > 1:
3218                self.unsupported(f"REGEXP_REPLACE occurrence={occurrence} not supported")
3219            # flag duckdb to do either all or none, single_replace check is for duckdb round trip
3220            elif (
3221                occurrence == 0
3222                and "g" not in validated_flags
3223                and not expression.args.get("single_replace")
3224            ):
3225                validated_flags += "g"
3226
3227        # Handle position (only literals supported)
3228        prefix = None
3229        if position and not position.is_int:
3230            self.unsupported("REGEXP_REPLACE with non-literal position")
3231        elif position and position.is_int and position.to_py() > 1:
3232            pos = position.to_py()
3233            prefix = exp.Substring(
3234                this=subject, start=exp.Literal.number(1), length=exp.Literal.number(pos - 1)
3235            )
3236            subject = exp.Substring(this=subject, start=exp.Literal.number(pos))
3237
3238        result: exp.Expr = exp.Anonymous(
3239            this="REGEXP_REPLACE",
3240            expressions=[
3241                subject,
3242                pattern,
3243                replacement,
3244                exp.Literal.string(validated_flags) if validated_flags else None,
3245            ],
3246        )
3247
3248        if prefix:
3249            result = exp.Concat(expressions=[prefix, result])
3250
3251        return self.sql(result)
def regexplike_sql(self, expression: sqlglot.expressions.core.RegexpLike) -> str:
3253    def regexplike_sql(self, expression: exp.RegexpLike) -> str:
3254        this = expression.this
3255        pattern = expression.expression
3256        flag = expression.args.get("flag")
3257
3258        if expression.args.get("full_match"):
3259            validated_flags = self._validate_regexp_flags(flag, supported_flags="cims")
3260            flag = exp.Literal.string(validated_flags) if validated_flags else None
3261            return self.func("REGEXP_FULL_MATCH", this, pattern, flag)
3262
3263        return self.func("REGEXP_MATCHES", this, pattern, flag)
@unsupported_args('ins_cost', 'del_cost', 'sub_cost')
def levenshtein_sql(self, expression: sqlglot.expressions.string.Levenshtein) -> str:
3265    @unsupported_args("ins_cost", "del_cost", "sub_cost")
3266    def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
3267        this = expression.this
3268        expr = expression.expression
3269        max_dist = expression.args.get("max_dist")
3270
3271        if max_dist is None:
3272            return self.func("LEVENSHTEIN", this, expr)
3273
3274        # Emulate Snowflake semantics: if distance > max_dist, return max_dist
3275        levenshtein = exp.Levenshtein(this=this, expression=expr)
3276        return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
def pad_sql(self, expression: sqlglot.expressions.string.Pad) -> str:
3278    def pad_sql(self, expression: exp.Pad) -> str:
3279        """
3280        Handle RPAD/LPAD for VARCHAR and BINARY types.
3281
3282        For VARCHAR: Delegate to parent class
3283        For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))
3284        """
3285        string_arg = expression.this
3286        fill_arg = expression.args.get("fill_pattern") or exp.Literal.string(" ")
3287
3288        if _is_binary(string_arg) or _is_binary(fill_arg):
3289            length_arg = expression.expression
3290            is_left = expression.args.get("is_left")
3291
3292            input_len = exp.ByteLength(this=string_arg)
3293            chars_needed = length_arg - input_len
3294            pad_count = exp.Greatest(
3295                this=exp.Literal.number(0), expressions=[chars_needed], ignore_nulls=True
3296            )
3297            repeat_expr = exp.Repeat(this=fill_arg, times=pad_count)
3298
3299            left, right = string_arg, repeat_expr
3300            if is_left:
3301                left, right = right, left
3302
3303            result = exp.DPipe(this=left, expression=right)
3304            return self.sql(result)
3305
3306        # For VARCHAR: Delegate to parent class (handles PAD_FILL_PATTERN_IS_REQUIRED)
3307        return super().pad_sql(expression)

Handle RPAD/LPAD for VARCHAR and BINARY types.

For VARCHAR: Delegate to parent class For BINARY: Lower to: input || REPEAT(pad, GREATEST(0, target_len - OCTET_LENGTH(input)))

def minhash_sql(self, expression: sqlglot.expressions.aggregate.Minhash) -> str:
3309    def minhash_sql(self, expression: exp.Minhash) -> str:
3310        k = expression.this
3311        exprs = expression.expressions
3312
3313        if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
3314            self.unsupported(
3315                "MINHASH with multiple expressions or * requires manual query restructuring"
3316            )
3317            return self.func("MINHASH", k, *exprs)
3318
3319        expr = exprs[0]
3320        result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
3321        return f"({self.sql(result)})"
def minhashcombine_sql(self, expression: sqlglot.expressions.aggregate.MinhashCombine) -> str:
3323    def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
3324        expr = expression.this
3325        result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
3326        return f"({self.sql(result)})"
def approximatesimilarity_sql( self, expression: sqlglot.expressions.aggregate.ApproximateSimilarity) -> str:
3328    def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
3329        expr = expression.this
3330        result = exp.replace_placeholders(self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr)
3331        return f"({self.sql(result)})"
def arrayuniqueagg_sql(self, expression: sqlglot.expressions.aggregate.ArrayUniqueAgg) -> str:
3333    def arrayuniqueagg_sql(self, expression: exp.ArrayUniqueAgg) -> str:
3334        return self.sql(
3335            exp.Filter(
3336                this=exp.func("LIST", exp.Distinct(expressions=[expression.this])),
3337                expression=exp.Where(this=expression.this.copy().is_(exp.null()).not_()),
3338            )
3339        )
def arrayunionagg_sql(self, expression: sqlglot.expressions.aggregate.ArrayUnionAgg) -> str:
3341    def arrayunionagg_sql(self, expression: exp.ArrayUnionAgg) -> str:
3342        self.unsupported("ARRAY_UNION_AGG is not supported in DuckDB")
3343        return self.function_fallback_sql(expression)
def arraydistinct_sql(self, expression: sqlglot.expressions.array.ArrayDistinct) -> str:
3345    def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
3346        arr = expression.this
3347        func = self.func("LIST_DISTINCT", arr)
3348
3349        if expression.args.get("check_null"):
3350            add_null_to_array = exp.func(
3351                "LIST_APPEND", exp.func("LIST_DISTINCT", exp.ArrayCompact(this=arr)), exp.Null()
3352            )
3353            return self.sql(
3354                exp.If(
3355                    this=exp.NEQ(
3356                        this=exp.ArraySize(this=arr), expression=exp.func("LIST_COUNT", arr)
3357                    ),
3358                    true=add_null_to_array,
3359                    false=func,
3360                )
3361            )
3362
3363        return func
def arrayintersect_sql(self, expression: sqlglot.expressions.array.ArrayIntersect) -> str:
3365    def arrayintersect_sql(self, expression: exp.ArrayIntersect) -> str:
3366        if expression.args.get("is_multiset") and len(expression.expressions) == 2:
3367            return self._array_bag_sql(
3368                self.ARRAY_INTERSECTION_CONDITION,
3369                expression.expressions[0],
3370                expression.expressions[1],
3371            )
3372        return self.function_fallback_sql(expression)
def arrayexcept_sql(self, expression: sqlglot.expressions.array.ArrayExcept) -> str:
3374    def arrayexcept_sql(self, expression: exp.ArrayExcept) -> str:
3375        arr1, arr2 = expression.this, expression.expression
3376        if expression.args.get("is_multiset"):
3377            return self._array_bag_sql(self.ARRAY_EXCEPT_CONDITION, arr1, arr2)
3378        return self.sql(
3379            exp.replace_placeholders(self.ARRAY_EXCEPT_SET_TEMPLATE, arr1=arr1, arr2=arr2)
3380        )
def arrayslice_sql(self, expression: sqlglot.expressions.array.ArraySlice) -> str:
3382    def arrayslice_sql(self, expression: exp.ArraySlice) -> str:
3383        """
3384        Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's
3385        ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE
3386        expressions that adjust the index at query time:
3387          - start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
3388          - end:   CASE WHEN end < 0 THEN end - 1 ELSE end END
3389        """
3390        start, end = expression.args.get("start"), expression.args.get("end")
3391
3392        if expression.args.get("zero_based"):
3393            if start is not None:
3394                start = (
3395                    exp.case()
3396                    .when(
3397                        exp.GTE(this=start.copy(), expression=exp.Literal.number(0)),
3398                        exp.Add(this=start.copy(), expression=exp.Literal.number(1)),
3399                    )
3400                    .else_(start)
3401                )
3402            if end is not None:
3403                end = (
3404                    exp.case()
3405                    .when(
3406                        exp.LT(this=end.copy(), expression=exp.Literal.number(0)),
3407                        exp.Sub(this=end.copy(), expression=exp.Literal.number(1)),
3408                    )
3409                    .else_(end)
3410                )
3411
3412        return self.func("ARRAY_SLICE", expression.this, start, end, expression.args.get("step"))

Transpiles Snowflake's ARRAY_SLICE (0-indexed, exclusive end) to DuckDB's ARRAY_SLICE (1-indexed, inclusive end) by wrapping start and end in CASE expressions that adjust the index at query time:

  • start: CASE WHEN start >= 0 THEN start + 1 ELSE start END
  • end: CASE WHEN end < 0 THEN end - 1 ELSE end END
def arrayszip_sql(self, expression: sqlglot.expressions.array.ArraysZip) -> str:
3414    def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
3415        args = expression.expressions
3416
3417        if not args:
3418            # Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
3419            return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
3420
3421        # Build placeholder values for template
3422        lengths = [exp.Length(this=arg) for arg in args]
3423        max_len = (
3424            lengths[0]
3425            if len(lengths) == 1
3426            else exp.Greatest(this=lengths[0], expressions=lengths[1:])
3427        )
3428
3429        # Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
3430        empty_struct = exp.func(
3431            "STRUCT",
3432            *[
3433                exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
3434                for i in range(len(args))
3435            ],
3436        )
3437
3438        # Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
3439        # COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
3440        index = exp.column("__i") + 1
3441        transform_struct = exp.func(
3442            "STRUCT",
3443            *[
3444                exp.PropertyEQ(
3445                    this=exp.Literal.string(f"${i + 1}"),
3446                    expression=exp.func("COALESCE", arg, exp.array())[index],
3447                )
3448                for i, arg in enumerate(args)
3449            ],
3450        )
3451
3452        result = exp.replace_placeholders(
3453            self.ARRAYS_ZIP_TEMPLATE.copy(),
3454            null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
3455            all_empty_check=exp.and_(
3456                *[
3457                    exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
3458                    for arg in args
3459                ]
3460            ),
3461            empty_struct=empty_struct,
3462            max_len=max_len,
3463            transform_struct=transform_struct,
3464        )
3465        return self.sql(result)
def lower_sql(self, expression: sqlglot.expressions.string.Lower) -> str:
3467    def lower_sql(self, expression: exp.Lower) -> str:
3468        result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
3469        return _gen_with_cast_to_blob(self, expression, result_sql)
def upper_sql(self, expression: sqlglot.expressions.string.Upper) -> str:
3471    def upper_sql(self, expression: exp.Upper) -> str:
3472        result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
3473        return _gen_with_cast_to_blob(self, expression, result_sql)
def reverse_sql(self, expression: sqlglot.expressions.string.Reverse) -> str:
3475    def reverse_sql(self, expression: exp.Reverse) -> str:
3476        result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
3477        return _gen_with_cast_to_blob(self, expression, result_sql)
def left_sql(self, expression: sqlglot.expressions.string.Left) -> str:
3503    def left_sql(self, expression: exp.Left) -> str:
3504        return self._left_right_sql(expression, "LEFT")
def right_sql(self, expression: sqlglot.expressions.string.Right) -> str:
3506    def right_sql(self, expression: exp.Right) -> str:
3507        return self._left_right_sql(expression, "RIGHT")
def rtrimmedlength_sql(self, expression: sqlglot.expressions.string.RtrimmedLength) -> str:
3509    def rtrimmedlength_sql(self, expression: exp.RtrimmedLength) -> str:
3510        return self.func("LENGTH", exp.Trim(this=expression.this, position="TRAILING"))
def stuff_sql(self, expression: sqlglot.expressions.string.Stuff) -> str:
3512    def stuff_sql(self, expression: exp.Stuff) -> str:
3513        base = expression.this
3514        start = expression.args["start"]
3515        length = expression.args["length"]
3516        insertion = expression.expression
3517        is_binary = _is_binary(base)
3518
3519        if is_binary:
3520            # DuckDB's SUBSTRING doesn't accept BLOB; operate on the HEX string instead
3521            # (each byte = 2 hex chars), then UNHEX back to BLOB
3522            base = exp.Hex(this=base)
3523            insertion = exp.Hex(this=insertion)
3524            left = exp.Substring(
3525                this=base.copy(),
3526                start=exp.Literal.number(1),
3527                length=(start.copy() - exp.Literal.number(1)) * exp.Literal.number(2),
3528            )
3529            right = exp.Substring(
3530                this=base.copy(),
3531                start=((start + length) - exp.Literal.number(1)) * exp.Literal.number(2)
3532                + exp.Literal.number(1),
3533            )
3534        else:
3535            left = exp.Substring(
3536                this=base.copy(),
3537                start=exp.Literal.number(1),
3538                length=start.copy() - exp.Literal.number(1),
3539            )
3540            right = exp.Substring(this=base.copy(), start=start + length)
3541        result: exp.Expr = exp.DPipe(
3542            this=exp.DPipe(this=left, expression=insertion), expression=right
3543        )
3544
3545        if is_binary:
3546            result = exp.Unhex(this=result)
3547
3548        return self.sql(result)
def rand_sql(self, expression: sqlglot.expressions.functions.Rand) -> str:
3550    def rand_sql(self, expression: exp.Rand) -> str:
3551        seed = expression.this
3552        if seed is not None:
3553            self.unsupported("RANDOM with seed is not supported in DuckDB")
3554
3555        lower = expression.args.get("lower")
3556        upper = expression.args.get("upper")
3557
3558        if lower and upper:
3559            # scale DuckDB's [0,1) to the specified range
3560            range_size = exp.paren(upper - lower)
3561            scaled = exp.Add(this=lower, expression=exp.func("random") * range_size)
3562
3563            # For now we assume that if bounds are set, return type is BIGINT. Snowflake/Teradata
3564            result = exp.cast(scaled, exp.DType.BIGINT)
3565            return self.sql(result)
3566
3567        # Default DuckDB behavior - just return RANDOM() as float
3568        return "RANDOM()"
def bytelength_sql(self, expression: sqlglot.expressions.string.ByteLength) -> str:
3570    def bytelength_sql(self, expression: exp.ByteLength) -> str:
3571        arg = expression.this
3572
3573        # Check if it's a text type (handles both literals and annotated expressions)
3574        if arg.is_type(*exp.DataType.TEXT_TYPES):
3575            return self.func("OCTET_LENGTH", exp.Encode(this=arg))
3576
3577        # Default: pass through as-is (conservative for DuckDB, handles binary and unannotated)
3578        return self.func("OCTET_LENGTH", arg)
def base64encode_sql(self, expression: sqlglot.expressions.string.Base64Encode) -> str:
3580    def base64encode_sql(self, expression: exp.Base64Encode) -> str:
3581        # DuckDB TO_BASE64 requires BLOB input
3582        # Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
3583        # encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
3584        result = expression.this
3585
3586        # Check if input is a string type - ENCODE only accepts VARCHAR
3587        if result.is_type(*exp.DataType.TEXT_TYPES):
3588            result = exp.Encode(this=result)
3589
3590        result = exp.ToBase64(this=result)
3591
3592        max_line_length = expression.args.get("max_line_length")
3593        alphabet = expression.args.get("alphabet")
3594
3595        # Handle custom alphabet by replacing standard chars with custom ones
3596        result = _apply_base64_alphabet_replacements(result, alphabet)
3597
3598        # Handle max_line_length by inserting newlines every N characters
3599        line_length = (
3600            t.cast(int, max_line_length.to_py())
3601            if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
3602            else 0
3603        )
3604        if line_length > 0:
3605            newline = exp.Chr(expressions=[exp.Literal.number(10)])
3606            result = exp.Trim(
3607                this=exp.RegexpReplace(
3608                    this=result,
3609                    expression=exp.Literal.string(f"(.{{{line_length}}})"),
3610                    replacement=exp.Concat(expressions=[exp.Literal.string("\\1"), newline.copy()]),
3611                ),
3612                expression=newline,
3613                position="TRAILING",
3614            )
3615
3616        return self.sql(result)
def hex_sql(self, expression: sqlglot.expressions.string.Hex) -> str:
3618    def hex_sql(self, expression: exp.Hex) -> str:
3619        case = expression.args.get("case")
3620
3621        if not case:
3622            return self.func("HEX", expression.this)
3623
3624        hex_expr = exp.Hex(this=expression.this)
3625        return self.sql(
3626            exp.case()
3627            .when(case.is_(exp.null()), exp.null())
3628            .when(case.copy().eq(0), exp.Lower(this=hex_expr.copy()))
3629            .else_(hex_expr)
3630        )
def replace_sql(self, expression: sqlglot.expressions.string.Replace) -> str:
3632    def replace_sql(self, expression: exp.Replace) -> str:
3633        result_sql = self.func(
3634            "REPLACE",
3635            _cast_to_varchar(expression.this),
3636            _cast_to_varchar(expression.expression),
3637            _cast_to_varchar(expression.args.get("replacement")),
3638        )
3639        return _gen_with_cast_to_blob(self, expression, result_sql)
def bitwisexor_sql(self, expression: sqlglot.expressions.core.BitwiseXor) -> str:
3646    def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
3647        _prepare_binary_bitwise_args(expression)
3648        result_sql = self.func("XOR", expression.this, expression.expression)
3649        return _gen_with_cast_to_blob(self, expression, result_sql)
def objectinsert_sql(self, expression: sqlglot.expressions.json.ObjectInsert) -> str:
3651    def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
3652        this = expression.this
3653        key = expression.args.get("key")
3654        key_sql = key.name if isinstance(key, exp.Expr) else ""
3655        value_sql = self.sql(expression, "value")
3656
3657        kv_sql = f"{key_sql} := {value_sql}"
3658
3659        # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
3660        # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
3661        if isinstance(this, exp.Struct) and not this.expressions:
3662            return self.func("STRUCT_PACK", kv_sql)
3663
3664        return self.func("STRUCT_INSERT", this, kv_sql)
def mapcat_sql(self, expression: sqlglot.expressions.array.MapCat) -> str:
3666    def mapcat_sql(self, expression: exp.MapCat) -> str:
3667        result = exp.replace_placeholders(
3668            self.MAPCAT_TEMPLATE.copy(),
3669            map1=expression.this,
3670            map2=expression.expression,
3671        )
3672        return self.sql(result)
def mapcontainskey_sql(self, expression: sqlglot.expressions.array.MapContainsKey) -> str:
3674    def mapcontainskey_sql(self, expression: exp.MapContainsKey) -> str:
3675        return self.func(
3676            "ARRAY_CONTAINS", exp.func("MAP_KEYS", expression.args["key"]), expression.this
3677        )
def mapdelete_sql(self, expression: sqlglot.expressions.array.MapDelete) -> str:
3679    def mapdelete_sql(self, expression: exp.MapDelete) -> str:
3680        map_arg = expression.this
3681        keys_to_delete = expression.expressions
3682
3683        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3684
3685        lambda_expr = exp.Lambda(
3686            this=exp.In(this=x_dot_key, expressions=keys_to_delete).not_(),
3687            expressions=[exp.to_identifier("x")],
3688        )
3689        result = exp.func(
3690            "MAP_FROM_ENTRIES",
3691            exp.ArrayFilter(this=exp.func("MAP_ENTRIES", map_arg), expression=lambda_expr),
3692        )
3693        return self.sql(result)
def mappick_sql(self, expression: sqlglot.expressions.array.MapPick) -> str:
3695    def mappick_sql(self, expression: exp.MapPick) -> str:
3696        map_arg = expression.this
3697        keys_to_pick = expression.expressions
3698
3699        x_dot_key = exp.Dot(this=exp.to_identifier("x"), expression=exp.to_identifier("key"))
3700
3701        if len(keys_to_pick) == 1 and keys_to_pick[0].is_type(exp.DType.ARRAY):
3702            lambda_expr = exp.Lambda(
3703                this=exp.func("ARRAY_CONTAINS", keys_to_pick[0], x_dot_key),
3704                expressions=[exp.to_identifier("x")],
3705            )
3706        else:
3707            lambda_expr = exp.Lambda(
3708                this=exp.In(this=x_dot_key, expressions=keys_to_pick),
3709                expressions=[exp.to_identifier("x")],
3710            )
3711
3712        result = exp.func(
3713            "MAP_FROM_ENTRIES",
3714            exp.func("LIST_FILTER", exp.func("MAP_ENTRIES", map_arg), lambda_expr),
3715        )
3716        return self.sql(result)
def mapsize_sql(self, expression: sqlglot.expressions.array.MapSize) -> str:
3718    def mapsize_sql(self, expression: exp.MapSize) -> str:
3719        return self.func("CARDINALITY", expression.this)
@unsupported_args('update_flag')
def mapinsert_sql(self, expression: sqlglot.expressions.array.MapInsert) -> str:
3721    @unsupported_args("update_flag")
3722    def mapinsert_sql(self, expression: exp.MapInsert) -> str:
3723        map_arg = expression.this
3724        key = expression.args.get("key")
3725        value = expression.args.get("value")
3726
3727        map_type = map_arg.type
3728
3729        if value is not None:
3730            if map_type and map_type.expressions and len(map_type.expressions) > 1:
3731                # Extract the value type from MAP(key_type, value_type)
3732                value_type = map_type.expressions[1]
3733                # Cast value to match the map's value type to avoid type conflicts
3734                value = exp.cast(value, value_type)
3735            # else: polymorphic MAP case - no type parameters available, use value as-is
3736
3737        # Create a single-entry map for the new key-value pair
3738        new_entry_struct = exp.Struct(expressions=[exp.PropertyEQ(this=key, expression=value)])
3739        new_entry: exp.Expression = exp.ToMap(this=new_entry_struct)
3740
3741        # Use MAP_CONCAT to merge the original map with the new entry
3742        # This automatically handles both insert and update cases
3743        result = exp.func("MAP_CONCAT", map_arg, new_entry)
3744
3745        return self.sql(result)
def startswith_sql(self, expression: sqlglot.expressions.string.StartsWith) -> str:
3747    def startswith_sql(self, expression: exp.StartsWith) -> str:
3748        return self.func(
3749            "STARTS_WITH",
3750            _cast_to_varchar(expression.this),
3751            _cast_to_varchar(expression.expression),
3752        )
def space_sql(self, expression: sqlglot.expressions.string.Space) -> str:
3754    def space_sql(self, expression: exp.Space) -> str:
3755        # DuckDB's REPEAT requires BIGINT for the count parameter
3756        return self.sql(
3757            exp.Repeat(
3758                this=exp.Literal.string(" "),
3759                times=exp.cast(expression.this, exp.DType.BIGINT),
3760            )
3761        )
def tablefromrows_sql(self, expression: sqlglot.expressions.query.TableFromRows) -> str:
3763    def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
3764        # For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
3765        if isinstance(expression.this, exp.Generator):
3766            # Preserve alias, joins, and other table-level args
3767            table = exp.Table(
3768                this=expression.this,
3769                alias=expression.args.get("alias"),
3770                joins=expression.args.get("joins"),
3771            )
3772            return self.sql(table)
3773
3774        return super().tablefromrows_sql(expression)
def unnest_sql(self, expression: sqlglot.expressions.array.Unnest) -> str:
3776    def unnest_sql(self, expression: exp.Unnest) -> str:
3777        explode_array = expression.args.get("explode_array")
3778        if explode_array:
3779            # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
3780            # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
3781            expression.expressions.append(
3782                exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
3783            )
3784
3785            # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
3786            alias = expression.args.get("alias")
3787            if isinstance(alias, exp.TableAlias):
3788                expression.set("alias", None)
3789                if alias.columns:
3790                    alias = exp.TableAlias(this=seq_get(alias.columns, 0))
3791
3792            unnest_sql = super().unnest_sql(expression)
3793            select = exp.Select(expressions=[unnest_sql]).subquery(alias)
3794            return self.sql(select)
3795
3796        return super().unnest_sql(expression)
def ignorenulls_sql(self, expression: sqlglot.expressions.core.IgnoreNulls) -> str:
3798    def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
3799        this = expression.this
3800
3801        if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3802            # DuckDB should render IGNORE NULLS only for the general-purpose
3803            # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
3804            return super().ignorenulls_sql(expression)
3805
3806        if isinstance(this, exp.First):
3807            this = exp.AnyValue(this=this.this)
3808
3809        if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
3810            self.unsupported("IGNORE NULLS is not supported for non-window functions.")
3811
3812        return self.sql(this)
def split_sql(self, expression: sqlglot.expressions.string.Split) -> str:
3814    def split_sql(self, expression: exp.Split) -> str:
3815        base_func = exp.func("STR_SPLIT", expression.this, expression.expression)
3816
3817        case_expr = exp.case().else_(base_func)
3818        needs_case = False
3819
3820        if expression.args.get("null_returns_null"):
3821            case_expr = case_expr.when(expression.expression.is_(exp.null()), exp.null())
3822            needs_case = True
3823
3824        if expression.args.get("empty_delimiter_returns_whole"):
3825            # When delimiter is empty string, return input string as single array element
3826            array_with_input = exp.array(expression.this)
3827            case_expr = case_expr.when(
3828                expression.expression.eq(exp.Literal.string("")), array_with_input
3829            )
3830            needs_case = True
3831
3832        return self.sql(case_expr if needs_case else base_func)
def splitpart_sql(self, expression: sqlglot.expressions.string.SplitPart) -> str:
3834    def splitpart_sql(self, expression: exp.SplitPart) -> str:
3835        string_arg = expression.this
3836        delimiter_arg = expression.args.get("delimiter")
3837        part_index_arg = expression.args.get("part_index")
3838
3839        if delimiter_arg and part_index_arg:
3840            # Handle Snowflake's "index 0 and 1 both return first element" behavior
3841            if expression.args.get("part_index_zero_as_one"):
3842                # Convert 0 to 1 for compatibility
3843
3844                part_index_arg = exp.Paren(
3845                    this=exp.case()
3846                    .when(part_index_arg.eq(exp.Literal.number("0")), exp.Literal.number("1"))
3847                    .else_(part_index_arg)
3848                )
3849
3850            # Use Anonymous to avoid recursion
3851            base_func_expr: exp.Expr = exp.Anonymous(
3852                this="SPLIT_PART", expressions=[string_arg, delimiter_arg, part_index_arg]
3853            )
3854            needs_case_transform = False
3855            case_expr = exp.case().else_(base_func_expr)
3856
3857            if expression.args.get("empty_delimiter_returns_whole"):
3858                # When delimiter is empty string:
3859                # - Return whole string if part_index is 1 or -1
3860                # - Return empty string otherwise
3861                empty_case = exp.Paren(
3862                    this=exp.case()
3863                    .when(
3864                        exp.or_(
3865                            part_index_arg.eq(exp.Literal.number("1")),
3866                            part_index_arg.eq(exp.Literal.number("-1")),
3867                        ),
3868                        string_arg,
3869                    )
3870                    .else_(exp.Literal.string(""))
3871                )
3872
3873                case_expr = case_expr.when(delimiter_arg.eq(exp.Literal.string("")), empty_case)
3874                needs_case_transform = True
3875
3876            """
3877            Output looks something like this:
3878
3879            CASE
3880            WHEN delimiter is '' THEN
3881                (
3882                    CASE
3883                    WHEN adjusted_part_index = 1 OR adjusted_part_index = -1 THEN input
3884                    ELSE '' END
3885                )
3886            ELSE SPLIT_PART(input, delimiter, adjusted_part_index)
3887            END
3888
3889            """
3890            return self.sql(case_expr if needs_case_transform else base_func_expr)
3891
3892        return self.function_fallback_sql(expression)
def respectnulls_sql(self, expression: sqlglot.expressions.core.RespectNulls) -> str:
3894    def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
3895        if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
3896            # DuckDB should render RESPECT NULLS only for the general-purpose
3897            # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
3898            return super().respectnulls_sql(expression)
3899
3900        self.unsupported("RESPECT NULLS is not supported for non-window functions.")
3901        return self.sql(expression, "this")
def arraytostring_sql(self, expression: sqlglot.expressions.array.ArrayToString) -> str:
3903    def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
3904        null = expression.args.get("null")
3905
3906        if expression.args.get("null_is_empty"):
3907            x = exp.to_identifier("x")
3908            list_transform = exp.Transform(
3909                this=expression.this.copy(),
3910                expression=exp.Lambda(
3911                    this=exp.Coalesce(
3912                        this=exp.cast(x, "TEXT"), expressions=[exp.Literal.string("")]
3913                    ),
3914                    expressions=[x],
3915                ),
3916            )
3917            array_to_string = exp.ArrayToString(
3918                this=list_transform, expression=expression.expression
3919            )
3920            if expression.args.get("null_delim_is_null"):
3921                return self.sql(
3922                    exp.case()
3923                    .when(expression.expression.copy().is_(exp.null()), exp.null())
3924                    .else_(array_to_string)
3925                )
3926            return self.sql(array_to_string)
3927
3928        if null:
3929            x = exp.to_identifier("x")
3930            return self.sql(
3931                exp.ArrayToString(
3932                    this=exp.Transform(
3933                        this=expression.this,
3934                        expression=exp.Lambda(
3935                            this=exp.Coalesce(this=x, expressions=[null]),
3936                            expressions=[x],
3937                        ),
3938                    ),
3939                    expression=expression.expression,
3940                )
3941            )
3942
3943        return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
def concatws_sql(self, expression: sqlglot.expressions.string.ConcatWs) -> str:
3945    def concatws_sql(self, expression: exp.ConcatWs) -> str:
3946        # DuckDB-specific: handle binary types using DPipe (||) operator
3947        separator = seq_get(expression.expressions, 0)
3948        args = expression.expressions[1:]
3949
3950        if any(_is_binary(arg) for arg in [separator, *args]):
3951            result = args[0]
3952            for arg in args[1:]:
3953                result = exp.DPipe(
3954                    this=exp.DPipe(this=result, expression=separator), expression=arg
3955                )
3956            return self.sql(result)
3957
3958        return super().concatws_sql(expression)
def regexpextract_sql(self, expression: sqlglot.expressions.string.RegexpExtract) -> str:
4013    def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
4014        return self._regexp_extract_sql(expression)
def regexpextractall_sql(self, expression: sqlglot.expressions.string.RegexpExtractAll) -> str:
4016    def regexpextractall_sql(self, expression: exp.RegexpExtractAll) -> str:
4017        return self._regexp_extract_sql(expression)
def regexpinstr_sql(self, expression: sqlglot.expressions.string.RegexpInstr) -> str:
4019    def regexpinstr_sql(self, expression: exp.RegexpInstr) -> str:
4020        this = expression.this
4021        pattern = expression.expression
4022        position = expression.args.get("position")
4023        orig_occ = expression.args.get("occurrence")
4024        occurrence = orig_occ or exp.Literal.number(1)
4025        option = expression.args.get("option")
4026        parameters = expression.args.get("parameters")
4027
4028        validated_flags = self._validate_regexp_flags(parameters, supported_flags="ims")
4029        if validated_flags:
4030            pattern = exp.Concat(expressions=[exp.Literal.string(f"(?{validated_flags})"), pattern])
4031
4032        # Handle starting position offset
4033        pos_offset: exp.Expr = exp.Literal.number(0)
4034        if position and (not position.is_int or position.to_py() > 1):
4035            this = exp.Substring(this=this, start=position)
4036            pos_offset = position - exp.Literal.number(1)
4037
4038        # Helper: LIST_SUM(LIST_TRANSFORM(list[1:end], x -> LENGTH(x)))
4039        def sum_lengths(func_name: str, end: exp.Expr) -> exp.Expr:
4040            lst = exp.Bracket(
4041                this=exp.Anonymous(this=func_name, expressions=[this, pattern]),
4042                expressions=[exp.Slice(this=exp.Literal.number(1), expression=end)],
4043                offset=1,
4044            )
4045            transform = exp.Anonymous(
4046                this="LIST_TRANSFORM",
4047                expressions=[
4048                    lst,
4049                    exp.Lambda(
4050                        this=exp.Length(this=exp.to_identifier("x")),
4051                        expressions=[exp.to_identifier("x")],
4052                    ),
4053                ],
4054            )
4055            return exp.Coalesce(
4056                this=exp.Anonymous(this="LIST_SUM", expressions=[transform]),
4057                expressions=[exp.Literal.number(0)],
4058            )
4059
4060        # Position = 1 + sum(split_lengths[1:occ]) + sum(match_lengths[1:occ-1]) + offset
4061        base_pos: exp.Expr = (
4062            exp.Literal.number(1)
4063            + sum_lengths("STRING_SPLIT_REGEX", occurrence)
4064            + sum_lengths("REGEXP_EXTRACT_ALL", occurrence - exp.Literal.number(1))
4065            + pos_offset
4066        )
4067
4068        # option=1: add match length for end position
4069        if option and option.is_int and option.to_py() == 1:
4070            match_at_occ = exp.Bracket(
4071                this=exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern]),
4072                expressions=[occurrence],
4073                offset=1,
4074            )
4075            base_pos = base_pos + exp.Coalesce(
4076                this=exp.Length(this=match_at_occ), expressions=[exp.Literal.number(0)]
4077            )
4078
4079        # NULL checks for all provided arguments
4080        # .copy() is used strictly because .is_() alters the node's parent pointer, mutating the parsed AST
4081        null_args = [
4082            expression.this,
4083            expression.expression,
4084            position,
4085            orig_occ,
4086            option,
4087            parameters,
4088        ]
4089        null_checks = [arg.copy().is_(exp.Null()) for arg in null_args if arg]
4090
4091        matches = exp.Anonymous(this="REGEXP_EXTRACT_ALL", expressions=[this, pattern])
4092
4093        return self.sql(
4094            exp.case()
4095            .when(exp.or_(*null_checks), exp.Null())
4096            .when(pattern.copy().eq(exp.Literal.string("")), exp.Literal.number(0))
4097            .when(exp.Length(this=matches) < occurrence, exp.Literal.number(0))
4098            .else_(base_pos)
4099        )
@unsupported_args('culture')
def numbertostr_sql(self, expression: sqlglot.expressions.string.NumberToStr) -> str:
4101    @unsupported_args("culture")
4102    def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
4103        fmt = expression.args.get("format")
4104        if fmt and fmt.is_int:
4105            return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
4106
4107        self.unsupported("Only integer formats are supported by NumberToStr")
4108        return self.function_fallback_sql(expression)
def autoincrementcolumnconstraint_sql(self, _) -> str:
4110    def autoincrementcolumnconstraint_sql(self, _) -> str:
4111        self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
4112        return ""
def aliases_sql(self, expression: sqlglot.expressions.core.Aliases) -> str:
4114    def aliases_sql(self, expression: exp.Aliases) -> str:
4115        this = expression.this
4116        if isinstance(this, exp.Posexplode):
4117            return self.posexplode_sql(this)
4118
4119        return super().aliases_sql(expression)
def posexplode_sql(self, expression: sqlglot.expressions.array.Posexplode) -> str:
4121    def posexplode_sql(self, expression: exp.Posexplode) -> str:
4122        this = expression.this
4123        parent = expression.parent
4124
4125        # The default Spark aliases are "pos" and "col", unless specified otherwise
4126        pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
4127
4128        if isinstance(parent, exp.Aliases):
4129            # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
4130            pos, col = parent.expressions
4131        elif isinstance(parent, exp.Table):
4132            # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
4133            alias = parent.args.get("alias")
4134            if alias:
4135                pos, col = alias.columns or [pos, col]
4136                alias.pop()
4137
4138        # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
4139        # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
4140        unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
4141        gen_subscripts = self.sql(
4142            exp.Alias(
4143                this=exp.Anonymous(
4144                    this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
4145                )
4146                - exp.Literal.number(1),
4147                alias=pos,
4148            )
4149        )
4150
4151        posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
4152
4153        if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
4154            # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
4155            return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
4156
4157        return posexplode_sql
def addmonths_sql(self, expression: sqlglot.expressions.temporal.AddMonths) -> str:
4159    def addmonths_sql(self, expression: exp.AddMonths) -> str:
4160        """
4161        Handles three key issues:
4162        1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
4163        2. End-of-month preservation: If input is last day of month, result is last day of result month
4164        3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
4165        """
4166        from sqlglot.optimizer.annotate_types import annotate_types
4167
4168        this = expression.this
4169        if not this.type:
4170            this = annotate_types(this, dialect=self.dialect)
4171
4172        if this.is_type(*exp.DataType.TEXT_TYPES):
4173            this = exp.Cast(this=this, to=exp.DataType(this=exp.DType.TIMESTAMP))
4174
4175        # Detect float/decimal months to apply rounding (Snowflake behavior)
4176        # DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
4177        months_expr = expression.expression
4178        if not months_expr.type:
4179            months_expr = annotate_types(months_expr, dialect=self.dialect)
4180
4181        # Build interval or to_months expression based on type
4182        # Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
4183        interval_or_to_months = (
4184            exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
4185            if months_expr.is_type(
4186                exp.DType.FLOAT,
4187                exp.DType.DOUBLE,
4188                exp.DType.DECIMAL,
4189            )
4190            # Integer case: standard INTERVAL N MONTH syntax
4191            else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
4192        )
4193
4194        date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
4195
4196        # Apply end-of-month preservation if Snowflake flag is set
4197        # CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
4198        preserve_eom = expression.args.get("preserve_end_of_month")
4199        result_expr = (
4200            exp.case()
4201            .when(
4202                exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
4203                exp.func("LAST_DAY", date_add_expr),
4204            )
4205            .else_(date_add_expr)
4206            if preserve_eom
4207            else date_add_expr
4208        )
4209
4210        # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
4211        # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type)
4212        # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
4213        # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
4214        if this.is_type(exp.DType.DATE, exp.DType.TIMESTAMPTZ):
4215            return self.sql(exp.Cast(this=result_expr, to=this.type))
4216        return self.sql(result_expr)

Handles three key issues:

  1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
  2. End-of-month preservation: If input is last day of month, result is last day of result month
  3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
def format_sql(self, expression: sqlglot.expressions.string.Format) -> str:
4218    def format_sql(self, expression: exp.Format) -> str:
4219        if expression.name.lower() == "%s" and len(expression.expressions) == 1:
4220            return self.func("FORMAT", "'{}'", expression.expressions[0])
4221
4222        return self.function_fallback_sql(expression)
def hexstring_sql( self, expression: sqlglot.expressions.query.HexString, binary_function_repr: str | None = None) -> str:
4224    def hexstring_sql(
4225        self, expression: exp.HexString, binary_function_repr: str | None = None
4226    ) -> str:
4227        # UNHEX('FF') correctly produces blob \xFF in DuckDB
4228        return super().hexstring_sql(expression, binary_function_repr="UNHEX")
def datetrunc_sql(self, expression: sqlglot.expressions.temporal.DateTrunc) -> str:
4230    def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
4231        unit = expression.args.get("unit")
4232        date = expression.this
4233
4234        week_start = _week_unit_to_dow(unit)
4235        unit = unit_to_str(expression)
4236
4237        if week_start:
4238            result = self.sql(
4239                _build_week_trunc_expression(date, week_start, preserve_start_day=True)
4240            )
4241        else:
4242            result = self.func("DATE_TRUNC", unit, date)
4243
4244        if (
4245            expression.args.get("input_type_preserved")
4246            and date.is_type(*exp.DataType.TEMPORAL_TYPES)
4247            and not (is_date_unit(unit) and date.is_type(exp.DType.DATE))
4248        ):
4249            return self.sql(exp.Cast(this=result, to=date.type))
4250
4251        return result
def timestamptrunc_sql(self, expression: sqlglot.expressions.temporal.TimestampTrunc) -> str:
4253    def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
4254        unit = unit_to_str(expression)
4255        zone = expression.args.get("zone")
4256        timestamp = expression.this
4257        date_unit = is_date_unit(unit)
4258
4259        if date_unit and zone:
4260            # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
4261            # Double AT TIME ZONE needed for BigQuery compatibility:
4262            # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
4263            # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
4264            timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
4265            result_sql = self.func("DATE_TRUNC", unit, timestamp)
4266            return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
4267
4268        result = self.func("DATE_TRUNC", unit, timestamp)
4269        if expression.args.get("input_type_preserved"):
4270            if timestamp.type and timestamp.is_type(exp.DType.TIME, exp.DType.TIMETZ):
4271                dummy_date = exp.Cast(
4272                    this=exp.Literal.string("1970-01-01"),
4273                    to=exp.DataType(this=exp.DType.DATE),
4274                )
4275                date_time = exp.Add(this=dummy_date, expression=timestamp)
4276                result = self.func("DATE_TRUNC", unit, date_time)
4277                return self.sql(exp.Cast(this=result, to=timestamp.type))
4278
4279            if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
4280                date_unit and timestamp.is_type(exp.DType.DATE)
4281            ):
4282                return self.sql(exp.Cast(this=result, to=timestamp.type))
4283
4284        return result
def trim_sql(self, expression: sqlglot.expressions.string.Trim) -> str:
4286    def trim_sql(self, expression: exp.Trim) -> str:
4287        expression.this.replace(_cast_to_varchar(expression.this))
4288        if expression.expression:
4289            expression.expression.replace(_cast_to_varchar(expression.expression))
4290
4291        result_sql = super().trim_sql(expression)
4292        return _gen_with_cast_to_blob(self, expression, result_sql)
def round_sql(self, expression: sqlglot.expressions.math.Round) -> str:
4294    def round_sql(self, expression: exp.Round) -> str:
4295        this = expression.this
4296        decimals = expression.args.get("decimals")
4297        truncate = expression.args.get("truncate")
4298
4299        # DuckDB requires the scale (decimals) argument to be an INT
4300        # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
4301        if decimals is not None and expression.args.get("casts_non_integer_decimals"):
4302            if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
4303                decimals = exp.cast(decimals, exp.DType.INT)
4304
4305        func = "ROUND"
4306        if truncate:
4307            # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
4308            if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
4309                func = "ROUND_EVEN"
4310                truncate = None
4311            # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
4312            elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
4313                truncate = None
4314
4315        return self.func(func, this, decimals, truncate)
def strtok_sql(self, expression: sqlglot.expressions.string.Strtok) -> str:
4317    def strtok_sql(self, expression: exp.Strtok) -> str:
4318        string_arg = expression.this
4319        delimiter_arg = expression.args.get("delimiter")
4320        part_index_arg = expression.args.get("part_index")
4321
4322        if delimiter_arg and part_index_arg:
4323            # Escape regex chars and build character class at runtime using REGEXP_REPLACE
4324            escaped_delimiter = exp.Anonymous(
4325                this="REGEXP_REPLACE",
4326                expressions=[
4327                    delimiter_arg,
4328                    exp.Literal.string(
4329                        r"([\[\]^.\-*+?(){}|$\\])"
4330                    ),  # Escape problematic regex chars
4331                    exp.Literal.string(
4332                        r"\\\1"
4333                    ),  # Replace with escaped version using $1 backreference
4334                    exp.Literal.string("g"),  # Global flag
4335                ],
4336            )
4337            # CASE WHEN delimiter = '' THEN '' ELSE CONCAT('[', escaped_delimiter, ']') END
4338            regex_pattern = (
4339                exp.case()
4340                .when(delimiter_arg.eq(exp.Literal.string("")), exp.Literal.string(""))
4341                .else_(
4342                    exp.func(
4343                        "CONCAT",
4344                        exp.Literal.string("["),
4345                        escaped_delimiter,
4346                        exp.Literal.string("]"),
4347                    )
4348                )
4349            )
4350
4351            # STRTOK skips empty strings, so we need to filter them out
4352            # LIST_FILTER(REGEXP_SPLIT_TO_ARRAY(string, pattern), x -> x != '')[index]
4353            split_array = exp.func("REGEXP_SPLIT_TO_ARRAY", string_arg, regex_pattern)
4354            x = exp.to_identifier("x")
4355            is_empty = x.eq(exp.Literal.string(""))
4356            filtered_array = exp.func(
4357                "LIST_FILTER",
4358                split_array,
4359                exp.Lambda(this=exp.not_(is_empty.copy()), expressions=[x.copy()]),
4360            )
4361            base_func = exp.Bracket(
4362                this=filtered_array,
4363                expressions=[part_index_arg],
4364                offset=1,
4365            )
4366
4367            # Use template with the built regex pattern
4368            result = exp.replace_placeholders(
4369                self.STRTOK_TEMPLATE.copy(),
4370                string=string_arg,
4371                delimiter=delimiter_arg,
4372                part_index=part_index_arg,
4373                base_func=base_func,
4374            )
4375
4376            return self.sql(result)
4377
4378        return self.function_fallback_sql(expression)
def strtoktoarray_sql(self, expression: sqlglot.expressions.array.StrtokToArray) -> str:
4380    def strtoktoarray_sql(self, expression: exp.StrtokToArray) -> str:
4381        string_arg = expression.this
4382        delimiter_arg = expression.args.get("expression") or exp.Literal.string(" ")
4383
4384        escaped = exp.RegexpReplace(
4385            this=delimiter_arg.copy(),
4386            expression=exp.Literal.string(r"([\[\]^.\-*+?(){}|$\\])"),
4387            replacement=exp.Literal.string(r"\\\1"),
4388            modifiers=exp.Literal.string("g"),
4389        )
4390        return self.sql(
4391            exp.replace_placeholders(
4392                self.STRTOK_TO_ARRAY_TEMPLATE.copy(),
4393                string=string_arg,
4394                delimiter=delimiter_arg,
4395                escaped=escaped,
4396            )
4397        )
def approxquantile_sql(self, expression: sqlglot.expressions.aggregate.ApproxQuantile) -> str:
4399    def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
4400        result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
4401
4402        # DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
4403        if expression.is_type(*exp.DataType.REAL_TYPES):
4404            result = f"CAST({result} AS DOUBLE)"
4405
4406        return result
def approxquantiles_sql(self, expression: sqlglot.expressions.aggregate.ApproxQuantiles) -> str:
4408    def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
4409        """
4410        BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
4411        dividing the input distribution into n equal-sized buckets.
4412
4413        Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
4414        does not document the specific algorithm used so results may differ. DuckDB does not
4415        support RESPECT NULLS.
4416        """
4417        this = expression.this
4418        if isinstance(this, exp.Distinct):
4419            # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
4420            if len(this.expressions) < 2:
4421                self.unsupported("APPROX_QUANTILES requires a bucket count argument")
4422                return self.function_fallback_sql(expression)
4423            num_quantiles_expr = this.expressions[1].pop()
4424        else:
4425            num_quantiles_expr = expression.expression
4426
4427        if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
4428            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4429            return self.function_fallback_sql(expression)
4430
4431        num_quantiles = t.cast(int, num_quantiles_expr.to_py())
4432        if num_quantiles <= 0:
4433            self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
4434            return self.function_fallback_sql(expression)
4435
4436        quantiles = [
4437            exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
4438            for i in range(num_quantiles + 1)
4439        ]
4440
4441        return self.sql(exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles)))

BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values dividing the input distribution into n equal-sized buckets.

Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery does not document the specific algorithm used so results may differ. DuckDB does not support RESPECT NULLS.

def jsonextractscalar_sql(self, expression: sqlglot.expressions.json.JSONExtractScalar) -> str:
4443    def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
4444        if expression.args.get("scalar_only"):
4445            expression = exp.JSONExtractScalar(
4446                this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
4447            )
4448        return _arrow_json_extract_sql(self, expression)
def bitwisenot_sql(self, expression: sqlglot.expressions.core.BitwiseNot) -> str:
4450    def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
4451        this = expression.this
4452
4453        if _is_binary(this):
4454            expression.type = exp.DType.BINARY.into_expr()
4455
4456        arg = _cast_to_bit(this)
4457
4458        if isinstance(this, exp.Neg):
4459            arg = exp.Paren(this=arg)
4460
4461        expression.set("this", arg)
4462
4463        result_sql = f"~{self.sql(expression, 'this')}"
4464
4465        return _gen_with_cast_to_blob(self, expression, result_sql)
def window_sql(self, expression: sqlglot.expressions.query.Window) -> str:
4467    def window_sql(self, expression: exp.Window) -> str:
4468        this = expression.this
4469        if isinstance(this, exp.Corr) or (
4470            isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
4471        ):
4472            return self._corr_sql(expression)
4473
4474        return super().window_sql(expression)
def filter_sql(self, expression: sqlglot.expressions.core.Filter) -> str:
4476    def filter_sql(self, expression: exp.Filter) -> str:
4477        if isinstance(expression.this, exp.Corr):
4478            return self._corr_sql(expression)
4479
4480        return super().filter_sql(expression)
def uuid_sql(self, expression: sqlglot.expressions.functions.Uuid) -> str:
4499    def uuid_sql(self, expression: exp.Uuid) -> str:
4500        namespace = expression.this
4501        name = expression.args.get("name")
4502
4503        # UUID v5 (namespace + name) - Emulate using SHA1
4504        if namespace and name:
4505            result = exp.replace_placeholders(
4506                self.UUID_V5_TEMPLATE.copy(),
4507                namespace=namespace,
4508                name=name,
4509            )
4510            return self.sql(result)
4511
4512        return super().uuid_sql(expression)
Inherited Members
sqlglot.generator.Generator
Generator
NULL_ORDERING_SUPPORTED
WINDOW_FUNCS_WITH_NULL_ORDERING
LOCKING_READS_SUPPORTED
EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
WRAP_DERIVED_VALUES
CREATE_FUNCTION_RETURN_AS
MATCHED_BY_SOURCE
SUPPORTS_MERGE_WHERE
SINGLE_STRING_INTERVAL
INTERVAL_ALLOWS_PLURAL_FORM
LIMIT_ONLY_LITERALS
GROUPINGS_SEP
INDEX_ON
INOUT_SEPARATOR
DIRECTED_JOINS
QUERY_HINT_SEP
IS_BOOL_ALLOWED
DUPLICATE_KEY_UPDATE_WITH_SET
LIMIT_IS_TOP
RETURNING_END
EXTRACT_ALLOWS_QUOTES
TZ_TO_WITH_TIME_ZONE
VALUES_AS_TABLE
ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
UNNEST_WITH_ORDINALITY
AGGREGATE_FILTER_SUPPORTED
COMPUTED_COLUMN_WITH_TYPE
SUPPORTS_TABLE_COPY
TABLESAMPLE_REQUIRES_PARENS
TABLESAMPLE_SIZE_IS_ROWS
TABLESAMPLE_WITH_METHOD
COLLATE_IS_FUNC
DATA_TYPE_SPECIFIERS_ALLOWED
ENSURE_BOOLS
CTE_RECURSIVE_KEYWORD_REQUIRED
SUPPORTS_SINGLE_ARG_CONCAT
SUPPORTS_TABLE_ALIAS_COLUMNS
SUPPORTS_NAMED_CTE_COLUMNS
UNPIVOT_ALIASES_ARE_IDENTIFIERS
INSERT_OVERWRITE
SUPPORTS_SELECT_INTO
SUPPORTS_UNLOGGED_TABLES
SUPPORTS_MODIFY_COLUMN
SUPPORTS_CHANGE_COLUMN
LIKE_PROPERTY_INSIDE_SCHEMA
JSON_TYPE_REQUIRED_FOR_EXTRACTION
JSON_PATH_SINGLE_QUOTE_ESCAPE
SET_OP_MODIFIERS
COPY_PARAMS_ARE_WRAPPED
COPY_PARAMS_EQ_REQUIRED
TRY_SUPPORTED
SUPPORTS_UESCAPE
UNICODE_SUBSTITUTE
HEX_FUNC
WITH_PROPERTIES_PREFIX
QUOTE_JSON_PATH
SUPPORTS_EXPLODING_PROJECTIONS
ARRAY_CONCAT_IS_VAR_LEN
SUPPORTS_CONVERT_TIMEZONE
SUPPORTS_MEDIAN
SUPPORTS_UNIX_SECONDS
ALTER_SET_WRAPPED
PARSE_JSON_NAME
ARRAY_SIZE_NAME
ALTER_SET_TYPE
SUPPORTS_BETWEEN_FLAGS
MATCH_AGAINST_TABLE_PREFIX
DECLARE_DEFAULT_ASSIGNMENT
UPDATE_STATEMENT_SUPPORTS_FROM
STAR_EXCLUDE_REQUIRES_DERIVED_TABLE
UNSUPPORTED_TYPES
TIME_PART_SINGULARS
TOKEN_MAPPING
EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
WITH_SEPARATED_COMMENTS
EXCLUDE_COMMENTS
PARAMETERIZABLE_TEXT_TYPES
EXPRESSIONS_WITHOUT_NESTED_CTES
RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
SAFE_JSON_PATH_KEY_RE
SENTINEL_LINE_BREAK
pretty
identify
normalize
pad
unsupported_level
max_unsupported
leading_comma
max_text_width
comments
dialect
normalize_functions
unsupported_messages
generate
preprocess
unsupported
sep
seg
sanitize_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_parts
column_sql
pseudocolumn_sql
columnposition_sql
columndef_sql
columnconstraint_sql
computedcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
generatedasrowcolumnconstraint_sql
periodforsystemtimeconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
inoutcolumnconstraint_sql
createable_sql
create_sql
sequenceproperties_sql
triggerproperties_sql
triggerreferencing_sql
triggerevent_sql
clone_sql
describe_sql
heredoc_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
bytestring_sql
unicodestring_sql
rawstring_sql
datatypeparam_sql
datatype_param_bound_limiter
datatype_sql
directory_sql
delete_sql
drop_sql
set_operation
set_operations
fetch_sql
limitoptions_sql
hint_sql
indexparameters_sql
index_sql
identifier_sql
lowerhex_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_name
property_sql
uuidproperty_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
moduleproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
partitionboundspec_sql
partitionedofproperty_sql
lockingproperty_sql
withdataproperty_sql
withsystemversioningproperty_sql
insert_sql
introducer_sql
kill_sql
pseudotype_sql
objectidentifier_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
withtablehint_sql
indextablehint_sql
historicaldata_sql
table_parts
table_sql
pivot_sql
version_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
groupingsets_sql
rollup_sql
rollupindex_sql
rollupproperty_sql
cube_sql
group_sql
having_sql
connect_sql
prior_sql
lateral_op
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
queryband_sql
pragma_sql
lock_sql
literal_sql
escape_str
loaddata_sql
null_sql
boolean_sql
booland_sql
boolor_sql
order_sql
withfill_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognizemeasure_sql
matchrecognize_sql
query_modifiers
options_modifier
for_modifiers
queryoption_sql
offset_limit_modifiers
after_limit_modifiers
select_sql
schema_sql
schema_columns_sql
star_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
prewhere_sql
where_sql
partition_by_sql
windowspec_sql
between_sql
bracket_offset_expressions
all_sql
any_sql
exists_sql
case_sql
constraint_sql
nextvaluefor_sql
convert_concat_args
concat_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonpath_sql
json_path_part
formatjson_sql
formatphrase_sql
jsonarray_sql
jsonarrayagg_sql
jsoncolumndef_sql
jsonschema_sql
jsontable_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
pivotalias_sql
atindex_sql
attimezone_sql
fromtimezone_sql
add_sql
and_sql
or_sql
xor_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwiseor_sql
bitwiserightshift_sql
cast_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
modifycolumn_sql
alterindex_sql
alterdiststyle_sql
altersortkey_sql
alterrename_sql
renamecolumn_sql
alterset_sql
alter_sql
altersession_sql
add_column_sql
droppartition_sql
dropprimarykey_sql
addconstraint_sql
addpartition_sql
distinct_sql
havingmax_sql
intdiv_sql
dpipe_sql
div_sql
safedivide_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
propertyeq_sql
escape_sql
glob_sql
gt_sql
gte_sql
is_sql
like_sql
ilike_sql
match_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
sub_sql
trycast_sql
jsoncast_sql
try_sql
log_sql
use_sql
binary
ceil_floor
function_fallback_sql
func
format_args
too_wide
format_time
expressions
op_expressions
naked_property
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
whens_sql
merge_sql
tochar_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql
duplicatekeyproperty_sql
uniquekeyproperty_sql
distributedbyproperty_sql
oncluster_sql
clusteredbyproperty_sql
anyvalue_sql
querytransform_sql
indexconstraintoption_sql
checkcolumnconstraint_sql
indexcolumnconstraint_sql
nvl2_sql
comprehension_sql
columnprefix_sql
opclass_sql
predict_sql
generateembedding_sql
generatetext_sql
generatetable_sql
generatebool_sql
generateint_sql
generatedouble_sql
mltranslate_sql
mlforecast_sql
aiforecast_sql
featuresattime_sql
vectorsearch_sql
forin_sql
refresh_sql
toarray_sql
tsordstotimestamp_sql
tsordstodatetime_sql
tsordstodate_sql
unixdate_sql
lastday_sql
dateadd_sql
arrayany_sql
struct_sql
partitionrange_sql
truncatetable_sql
convert_sql
copyparameter_sql
credentials_sql
copy_sql
semicolon_sql
datadeletionproperty_sql
maskingpolicycolumnconstraint_sql
gapfill_sql
scope_resolution
scoperesolution_sql
changes_sql
summarize_sql
explodinggenerateseries_sql
converttimezone_sql
json_sql
jsonvalue_sql
skipjsoncolumn_sql
conditionalinsert_sql
multitableinserts_sql
oncondition_sql
jsonextractquote_sql
jsonexists_sql
arrayagg_sql
slice_sql
apply_sql
grant_sql
revoke_sql
grantprivilege_sql
grantprincipal_sql
columns_sql
overlay_sql
todouble_sql
string_sql
median_sql
overflowtruncatebehavior_sql
unixseconds_sql
arraysize_sql
attach_sql
detach_sql
attachoption_sql
watermarkcolumnconstraint_sql
encodeproperty_sql
includeproperty_sql
xmlelement_sql
xmlkeyvalueoption_sql
partitionbyrangeproperty_sql
partitionbyrangepropertydynamic_sql
unpivotcolumns_sql
analyzesample_sql
analyzestatistics_sql
analyzehistogram_sql
analyzedelete_sql
analyzelistchainedrows_sql
analyzevalidate_sql
analyze_sql
xmltable_sql
xmlnamespace_sql
export_sql
declare_sql
declareitem_sql
recursivewithsearch_sql
parameterizedagg_sql
anonymousaggfunc_sql
combinedaggfunc_sql
combinedparameterizedagg_sql
get_put_sql
translatecharacters_sql
decodecase_sql
semanticview_sql
getextract_sql
datefromunixdate_sql
buildproperty_sql
refreshtriggerproperty_sql
modelattribute_sql
directorystage_sql
initcap_sql
localtime_sql
localtimestamp_sql
weekstart_sql
block_sql
storedprocedure_sql
ifblock_sql
whileblock_sql
execute_sql
executesql_sql
altermodifysqlsecurity_sql
usingproperty_sql
renameindex_sql